Commit 02567b09 authored by Carlos GO's avatar Carlos GO
Browse files

done

parent d1a4b85a
No preview for this file type
No preview for this file type
@article{ge2018novo,
title={De novo discovery of structural motifs in RNA 3D structures through clustering},
author={Ge, Ping and Islam, Shahidul and Zhong, Cuncong and Zhang, Shaojie},
journal={Nucleic acids research},
volume={46},
number={9},
pages={4783--4793},
year={2018},
publisher={Oxford University Press}
}
@article{leontis2001geometric,
title={Geometric nomenclature and classification of RNA base pairs},
author={Leontis, Neocles B and Westhof, Eric},
journal={Rna},
volume={7},
number={4},
pages={499--512},
year={2001},
publisher={Cambridge University Press}
}
@phdthesis{reinharz2016algorithmic,
title={Algorithmic Properties of Evolved Structured RNAs},
author={Reinharz, Vladimir},
year={2016},
school={McGill University Libraries}
}
@article{oliver2019necessary,
title={The necessary emergence of structural complexity in self-replicating RNA populations},
author={Oliver, Carlos G and Reinharz, Vladimir and Waldisp{\"u}hl, J{\'e}r{\^o}me},
journal={bioRxiv},
pages={218990},
year={2019},
publisher={Cold Spring Harbor Laboratory}
}
@article{oliver2017necessary,
title={On the emergence of structural complexity in RNA replicators},
author={Oliver, Carlos G and Reinharz, Vladimir and Waldisp{\"u}hl, J{\'e}r{\^o}me},
journal={bioRxiv},
pages={218990},
year={2019},
publisher={Cold Spring Harbor Laboratory}
}
@article{lang2009dock,
title={DOCK 6: Combining techniques to model RNA--small molecule complexes},
author={Lang, P Therese and Brozell, Scott R and Mukherjee, Sudipto and Pettersen, Eric F and Meng, Elaine C and Thomas, Veena and Rizzo, Robert C and Case, David A and James, Thomas L and Kuntz, Irwin D},
journal={Rna},
volume={15},
number={6},
pages={1219--1230},
year={2009},
publisher={Cold Spring Harbor Lab}
}
@article{xu2019gromov,
title={Gromov-wasserstein learning for graph matching and node embedding},
author={Xu, Hongteng and Luo, Dixin and Zha, Hongyuan and Carin, Lawrence},
journal={arXiv preprint arXiv:1901.06003},
year={2019}
}
@inproceedings{singh2007pairwise,
title={Pairwise global alignment of protein interaction networks by matching neighborhood topology},
author={Singh, Rohit and Xu, Jinbo and Berger, Bonnie},
booktitle={Annual International Conference on Research in Computational Molecular Biology},
pages={16--31},
year={2007},
organization={Springer}
}
@inproceedings{sun2015simultaneous,
title={Simultaneous optimization of both node and edge conservation in network alignment via WAVE},
author={Sun, Yihan and Crawford, Joseph and Tang, Jie and Milenkovi{\'c}, Tijana},
booktitle={International Workshop on Algorithms in Bioinformatics},
pages={16--39},
year={2015},
organization={Springer}
}
@article{malod2015graal,
title={L-GRAAL: Lagrangian graphlet-based network aligner},
author={Malod-Dognin, No{\"e}l and Pr{\v{z}}ulj, Nata{\v{s}}a},
journal={Bioinformatics},
volume={31},
number={13},
pages={2182--2189},
year={2015},
publisher={Oxford University Press}
}
@phdthesis{djelloul2009algorithmes,
title={Algorithmes de graphes pour la recherche de motifs r{\'e}currents dans les structures tertiaires d'ARN},
author={Djelloul, Mahassine},
year={2009},
school={Universit{\'e} Paris Sud-Paris XI}
}
@article{petrov2013automated,
title={Automated classification of RNA 3D motifs and the RNA 3D Motif Atlas},
author={Petrov, Anton I and Zirbel, Craig L and Leontis, Neocles B},
journal={Rna},
volume={19},
number={10},
pages={1327--1340},
year={2013},
publisher={Cold Spring Harbor Lab}
}
@article{reinharz2018mining,
title={Mining for recurrent long-range interactions in RNA structures reveals embedded hierarchies in network families},
author={Reinharz, Vladimir and Soul{\'e}, Antoine and Westhof, Eric and Waldisp{\"u}hl, J{\'e}r{\^o}me and Denise, Alain},
journal={Nucleic acids research},
volume={46},
number={8},
pages={3841--3851},
year={2018},
publisher={Oxford University Press}
}
@article{oliver2019extended,
title={Extended RNA base pairing networks imprint small molecule binding preferences},
author={Oliver, Carlos G and Gendron, Roman Sarrazin and Moitessier, Nicolas and Mallet, Vincent and Reinharz, Vladimir and Waldisp{\"u}hl, J{\'e}r{\^o}me},
journal={bioRxiv},
pages={701326},
year={2019},
publisher={Cold Spring Harbor Laboratory}
}
@article{sarrazin2019automated,
title={Automated, customizable and efficient identification of 3D base pair modules with BayesPairing},
author={Sarrazin-Gendron, Roman and Reinharz, Vladimir and Oliver, Carlos G and Moitessier, Nicolas and Waldisp{\"u}hl, J{\'e}r{\^o}me},
journal={Nucleic acids research},
volume={47},
number={7},
pages={3321--3332},
year={2019},
publisher={Oxford University Press}
}
@article{philips2013ligandrna,
title={LigandRNA: computational predictor of RNA--ligand interactions},
author={Philips, Anna and Milanowska, Kaja and {\L}ach, Grzegorz and Bujnicki, Janusz M},
journal={RNA},
volume={19},
number={12},
pages={1605--1616},
year={2013},
publisher={Cold Spring Harbor Lab}
}
@article{leontis2006building,
title={The building blocks and motifs of RNA architecture},
author={Leontis, Neocles B and Lescoute, Aurelie and Westhof, Eric},
journal={Current opinion in structural biology},
volume={16},
number={3},
pages={279--287},
year={2006},
publisher={Elsevier}
}
@inproceedings{schlichtkrull2018modeling,
title={Modeling relational data with graph convolutional networks},
author={Schlichtkrull, Michael and Kipf, Thomas N and Bloem, Peter and Van Den Berg, Rianne and Titov, Ivan and Welling, Max},
booktitle={European Semantic Web Conference},
pages={593--607},
year={2018},
organization={Springer}
}
@article{warner2018principles,
title={Principles for targeting RNA with drug-like small molecules},
author={Warner, Katherine Deigan and Hajdin, Christine E and Weeks, Kevin M},
journal={Nature Reviews Drug Discovery},
volume={17},
number={8},
pages={547},
year={2018},
publisher={Nature Publishing Group}
}
@article{stelzer2011discovery,
title={Discovery of selective bioactive small molecules by targeting an RNA dynamic ensemble},
author={Stelzer, Andrew C and Frank, Aaron T and Kratz, Jeremy D and Swanson, Michael D and Gonzalez-Hernandez, Marta J and Lee, Janghyun and Andricioaei, Ioan and Markovitz, David M and Al-Hashimi, Hashim M},
journal={Nature chemical biology},
volume={7},
number={8},
pages={553},
year={2011},
publisher={Nature Publishing Group}
}
......@@ -26,11 +26,12 @@
\tikzstyle{greentagstyle} = [rectangle, fill = green!30, draw = black, drop shadow, font={\sffamily\bfseries}, text=black]
\tikzstyle{redtagstyle} = [rectangle, fill = red!30, draw = black, drop shadow, font={\sffamily\bfseries}, text=black]
\tikzstyle{bluetagstyle} = [rectangle, fill = blue!30, draw = black, drop shadow, font={\sffamily\bfseries}, text=black]
\newcommand{\ideatag}{\tikz{\node[greentagstyle] {Idea};}\xspace}
\newcommand{\problemtag}{\tikz{\node[redtagstyle] {Problem};}\xspace}
\newcommand{\customtag}[1]{\tikz{\node[bluetagstyle] {#1};}\xspace}
......@@ -95,7 +96,7 @@
\begin{itemize}
\item RNA can fulfill virtually all needs of life (information carrying, catalytic activity). All organisms have RNA.
\item Folds hierarchically from a sequence to a 2D scaffold, to a full 3D structure.
\item Folds hierarchically from a {\bf sequence} to a 2D {\bf scaffold}, to a full 3D structure.
\end{itemize}
\begin{figure}[h!]
......@@ -112,6 +113,7 @@
\begin{itemize}
\item Looking closer at looping regions we see more pairwise interactions.
\item These interactions specify the final 3D structure.
\end{itemize}
\begin{columns}
......@@ -178,6 +180,11 @@
\begin{frame}
\frametitle{Multi-loop distribution}
\begin{itemize}
\item Stable multi-branched structures are enriched in mutational landscape.
\item Evolutionary algorithm (\maternal) is able to find nearby multi-loops.
\end{itemize}
\begin{figure}
\includegraphics[width=\textwidth]{matmut.png}
\end{figure}
......@@ -191,7 +198,7 @@
\begin{itemize}
\item Thermodynamic ensembles could have shaped the discovery of complex structures.
\item More complex evolutionary explorations needed to explain full distribution.
\item Manuscript accepted at RNA journal. \cite{oliver2017necessary}
\item Manuscript accepted at RNA journal \cite{oliver2017necessary}.
\end{itemize}
\item Next Steps
\begin{itemize}
......@@ -216,10 +223,11 @@
\end{frame}
\begin{frame}
\frametitle{{\bf Project II:} \rnamigos functional roles for complex structures}
\frametitle{{\bf Project II:} \rnamigos, functional roles for complex structures}
\begin{itemize}
\item RNA function can be modulated through binding with small organic compounds (ligands).
\item Strong potential for novel drug-based therapies ~\cite{warner2018principles}.
\end{itemize}
\begin{columns}
......@@ -229,7 +237,7 @@
\includegraphics[height=0.35\textheight]{rna1.png}
\includegraphics[height=0.35\textheight]{rna2.png}
\includegraphics[height=0.3\textheight]{rna10.png}
\caption{3D crystal structure}
\caption{3D crystal structure co-crystallized with ligands.}
\end{figure}
\end{centering}
\end{column}
......@@ -244,11 +252,11 @@
\item The majority of RNA ligands are discovered through phenotypic screens and later attributed to RNA binding. (e.g. ribocil)
\item Computational approaches focus on direct docking.
\begin{itemize}
\item DOCK 6.0 (2015) \cite{lang2009dock}
\item Molecular Forecaster (2006) (Moitessier, \textcolor{red}{Collaborators})
\item LigandRNA (Bujnicki)
\item DOCK 6.0 \cite{lang2009dock}
\item Molecular Forecaster for RNA (Moitessier, work in progress \textcolor{red}{Collaborators})
\item LigandRNA ~\cite{philips2013ligandrna}
\end{itemize}
\item This is costly given the number of potential drugs is $\sim 10^{23}$.
\end{itemize}
% \setbeamercolor{block body}{bg=green!20}
......@@ -256,19 +264,18 @@
\begin{figure}
\centering
\includegraphics[width=\textwidth]{ensemble.png}
\footnote{Steltzer et. al., 2011}
\includegraphics[width=\textwidth]{ensemble.png} \cite{stelzer2011discovery}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{RNAMIGOS}
\frametitle{\rnamigos}
\begin{itemize}
\item \problemtag given a new RNA, what drugs would likely bind to it?
\item \problemtag we don't have an energy model for non-canonical interactions.
\item \problemtag given a new RNA, can we narrow the search space?
\item \problemtag we don't have an energy model for non-canonical interactions (unlike \maternal).
\item \ideatag treat RNA and ligand as input-output pair for ML.
\end{itemize}
\begin{figure}
......@@ -302,21 +309,12 @@
\end{frame}
\begin{frame}
\frametitle{Results}
\begin{itemize}
\item Using the 3D structure graph we can increase the probability of retrieving the true ligand.
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{tree_ligs.pdf}
\caption{Performance of \rnamigos on different ligand classes. The tree represents a hierarchical clustering over the ligands in the dataset to visualize similarity relationships. Performance on each class is shown with a colored tile at the leaves of the tree.}
\label{fig:tree}
\end{figure}
\frametitle{We can use \rnamigos to find binding sites}
\includegraphics[width=\textwidth]{pocket_finding.png}
\end{frame}
\begin{frame}
\frametitle{Conclusions \& Next Steps}
......@@ -349,13 +347,13 @@
\frametitle{{\bf Project III:} \vernal, discovering new functional structures}
\begin{itemize}
\item \problemtag Novel functional RNA sites can be found in crystal structures
\item \ideatag: represent crystal structures as graphs and identify recurrent subgraphs.
\item \problemtag Can we find novel functional RNA sites without supervision? (not \rnamigos)
\item \ideatag: Interesting structures should be conserved across different RNA.
\end{itemize}
\begin{figure}
\includegraphics[width=0.8\textwidth]{carnaval.png}
\includegraphics[width=0.8\textwidth]{motifs.png}\cite{leontis2006building}
\end{figure}
\end{frame}
......@@ -363,6 +361,8 @@
\frametitle{Related work}
\begin{itemize}
\item Graphs allow more control on meaningful clustering. But..
\item Current graph-based tools are either limited by the type of interactions scanned, or use rigid matching.
\item We want a fuzzy motif finding tool that works on graphs.
\end{itemize}
......@@ -370,11 +370,11 @@
\resizebox{\textwidth}{!}{%
\begin{tabular}{lllll}
\hline
& flexible motifs & all ss & graph-based \\ \hline
rna3dmotif ~\cite{djelloul2009algorithmes} & \xmark & \xmark & \cmark \\ \hline
& fuzzy & all ss & graph-based \\ \hline
\texttt{rna3dmotif} ~\cite{djelloul2009algorithmes} & \xmark & \xmark & \cmark \\ \hline
RNA 3D Motif Atlas ~\cite{petrov2013automated} & $\sim$ & \xmark & $\sim$ \\ \hline
CaRNAval ~\cite{reinharz2018mining} & \xmark & \xmark & \cmark \\ \hline
RNAMSC \cite{ge2018novo} & \cmark & \xmark & \xmark \\ \hline
\texttt{CaRNAval} ~\cite{reinharz2018mining} & \xmark & \xmark & \cmark \\ \hline
\texttt{RNAMSC} \cite{ge2018novo} & \cmark & \xmark & \xmark \\ \hline
{\bf \vernal} & \cmark & \cmark & \cmark
\end{tabular}%
}
......@@ -386,10 +386,11 @@
\begin{frame}
\frametitle{VERNAL}
\begin{itemize}
\item \problemtag Quickly identify similar subgraphs from a large set of large graphs.
\item \problemtag Quickly identify {\it similar} subgraphs from a large set of large graphs.
\item \ideatag Encode each node's structure in vector space and cluster.
\item $Z \in \mathbb{R}^{n \times d}$ embedding matrix.
\item $K \in \mathbb{R}^{n \times n}$ similarity matrix.
\item $Z \in \mathbb{R}^{n \times d}$ embedding matrix RGCN ~\cite{schlichtkrull2018modeling}.
\item $K \in \mathbb{R}^{n \times n}$ similarity matrix (user-defined).
\end{itemize}
\begin{figure}
\includegraphics[width=\textwidth]{vernal_0.pdf}
......@@ -398,7 +399,7 @@
\begin{frame}{Node similarity function}
\begin{itemize}
\item \ideatag Similar rings will have similar distributions of edge types.
\item \ideatag Similar nodes will have similar distributions of edge types in their neighbourhood.
\end{itemize}
\begin{equation}
......@@ -438,11 +439,11 @@
\frametitle{Motif learning}
\begin{itemize}
\item \ideatag assign each node to $m << n$ `motif' centers.
\item \ideatag ask model to `classify' each node to $m << n$ `motif' centers.
\item $\Sigma \in [0,1]^{n \times m}$ soft assignment of each node ($n$) to a motif ($m$)
\item $E \in [0,1]^{m \times d}$ dictionary matrix.
\item $\Sigma^T Z \in \mathbb{R}^{m \times d}$ at cluster $i$ and dimension $d$ is an average embedding weighted by motif attribution.
\item $\Sigma^T Z \in \mathbb{R}^{m \times d}$ at cluster $i$ and dimension $j$ is an average embedding weighted by motif attribution.
\begin{equation}
(\Sigma^T Z)_{ij} = \frac{\sum_{k=1}^{n} \sigma_{ik} z_{jk}}{\norm{\sigma_i}}
......@@ -452,7 +453,7 @@
\end{itemize}
\begin{figure}
\includegraphics[width=0.65\textwidth]{motif_illustration.pdf}
\includegraphics[width=0.6\textwidth]{motif_illustration.pdf}
\end{figure}
\end{frame}
......@@ -472,6 +473,11 @@
\begin{itemize}
\item Nodes belonging to motif $\mathcal{\mu} \in \mathcal{M}$ should get the same `label' across all graphs
\item i.e. distribution over labels inside motif instances should have zero entropy
\begin{figure}
\centering
\includegraphics[width=0.5\textwidth]{carnvalidate.pdf}
\end{figure}
\item Let $P = \mathbb{P}[\Phi(v) \vert \Phi, v \in \mu]$, $\bar{P} = \mathbb{P}[\Phi(v) \vert \Phi, v \notin \mu]$
\begin{equation}
......@@ -481,10 +487,7 @@
\item $\Phi(v)$ is the motif assignment given by model $\Phi$ on node $v$
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.5\textwidth]{carnvalidate.pdf}
\end{figure}
\end{frame}
......@@ -500,6 +503,25 @@
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Current Status \& Next Steps}
Current Status
\begin{itemize}
\item Implementation complete
\item Learning confirmed on node-level clustering.
\end{itemize}
Next Steps
\begin{itemize}
\item Tune motif finding module.
\item Full validation on RNA graphs.
\item {\bf Target:} Submission to ISMB in January.
\end{itemize}
\end{frame}
\section{{\bf Project IV:} \garl}
\begin{frame}
......@@ -533,17 +555,22 @@
\frametitle{Some RNA-specific considerations}
\begin{figure}
\includegraphics[width=\textwidth]{align.pdf}
\includegraphics[width=.9\textwidth]{align.pdf}
\end{figure}
\begin{itemize}
\item Motif-level conservation?
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Related Work}
\begin{itemize}
\item Existing methods focus on strict edge conservation.
\item Existing methods start with a fixed cost function and focus on strict edge conservation.
\item Mostly applied to aligning protein-protein interaction networks.
\item Major tools rely on {\it a priori} node and edge conservation heuristics
\end{itemize}
......@@ -609,16 +636,21 @@
Structural complexity in RNA poses many computational challenges which we addressed with the following:
\begin{enumerate}
\item \maternal lets us explore evolutionary scenarios that can permit the formation of complex structures
\item \rnamigos shows that graphical representations of RNA structure holds important functional signals
\item \vernal helps us identify potentially novel related or functional RNAs
\item \garl can highlight key interactions in groups of related RNA.
\item \customtag{\maternal} lets us explore evolutionary scenarios that can permit the formation of complex structures
\item \customtag{\rnamigos} shows that graphical representations of RNA structure holds important functional signals
\item \customtag{\vernal} helps us identify potentially novel related or functional RNAs
\item \customtag{\garl} can highlight key interactions in groups of related RNA.
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Timeline}
\begin{itemize}
\item \problemtag finish PhD.
\item \ideatag resubmit \rnamigos in the Fall, focus on \vernal and \garl for the next year.
\end{itemize}
\begin{figure}[h]
\setupchronology{startyear=2016,color=blue,stopyear=2021,dates=false,arrow=false}
......@@ -671,8 +703,9 @@
\item J\'er\^ome Waldisp\"uhl
\item Vincent Mallet
\item Roman Sarrazin Gendron
\item Hua Ting Yao
\item Jacques Boitreaud
\item Hua-Ting Yao
\end{itemize}
......@@ -697,6 +730,7 @@
\bigskip
{\bf Helpful Feedback}
\begin{itemize}
\item Mathieu Blanchette
\item \'Eric Westhof (University of Strasbourg)
\item Vladimir Reinharz (Center for Soft and Living Matter, Ulsan, South Korea)
\end{itemize}
......@@ -712,15 +746,10 @@
\begin{frame}[allowframebreaks]
\frametitle{References}
\bibliographystyle{plain}
\bibliography{biblio}
\bibliography{talk}
\end{frame}
\begin{frame}
\frametitle{We can use \rnamigos to find binding sites}
\includegraphics[width=0.3\textwidth]{3d0x_find.png}
\includegraphics[width=0.3\textwidth]{3ox0_find.png}
\includegraphics[width=0.3\textwidth]{3suy_find.png}
\end{frame}
\begin{frame}
\frametitle{Distribution of RNA ligands}
......@@ -731,6 +760,22 @@
\end{frame}
\begin{frame}
\frametitle{Results}
\begin{itemize}
\item We observe good performance across different types of ligand.
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{tree_ligs.pdf}
\caption{Performance of \rnamigos on different ligand classes. The tree represents a hierarchical clustering over the ligands in the dataset to visualize similarity relationships. Performance on each class is shown with a colored tile at the leaves of the tree.}
\label{fig:tree}
\end{figure}
\end{frame}
\begin{frame}{Picking $d$: Graphlet Distance}
Idea: Compare structure of nodes in each ring with alignments.
\begin{equation}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment