Commit 6c9f808b authored by Carlos GO's avatar Carlos GO
Browse files

done for real

parent 02567b09
......@@ -178,5 +178,25 @@
year={2011},
publisher={Nature Publishing Group}
}
@article{neveu2013strong,
title={The ?strong? RNA world hypothesis: Fifty years old},
author={Neveu, Marc and Kim, Hyo-Joong and Benner, Steven A},
journal={Astrobiology},
volume={13},
number={4},
pages={391--403},
year={2013},
publisher={Mary Ann Liebert, Inc. 140 Huguenot Street, 3rd Floor New Rochelle, NY 10801 USA}
}
@article{heyne2012graphclust,
title={GraphClust: alignment-free structural clustering of local RNA secondary structures},
author={Heyne, Steffen and Costa, Fabrizio and Rose, Dominic and Backofen, Rolf},
journal={Bioinformatics},
volume={28},
number={12},
pages={i224--i232},
year={2012},
publisher={Oxford University Press}
}
......@@ -92,11 +92,28 @@
\section{Backgound}
\begin{frame}
\frametitle{RNA}
\frametitle{Why do we care about RNA? -- Classical View}
\begin{itemize}
\item RNA can fulfill virtually all needs of life (information carrying, catalytic activity). All organisms have RNA.
\item Folds hierarchically from a {\bf sequence} to a 2D {\bf scaffold}, to a full 3D structure.
\item All organisms rely on RNA.
\item Classically seen as an information carrier between \textcolor{blue}{DNA} (information storage) and \textcolor{green}{Protein} (functional units).
\end{itemize}
\begin{figure}[h!]
\centering
\includegraphics[width=0.7\textwidth]{dogma.png} \footnote{\url{biologycorner.com}}
\caption{This is an incomplete picture}
\label{fig:rna}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Why do we care about RNA? -- A closer look}
\begin{itemize}
\item Folds hierarchically from a {\bf sequence} to a stable 2D {\bf scaffold}, to a full 3D structure.
\item RNA is also an independently functional unit.
\end{itemize}
\begin{figure}[h!]
......@@ -107,6 +124,7 @@
\end{frame}
\begin{frame}
\frametitle{3D interactions}
......@@ -133,6 +151,10 @@
\begin{frame}
\frametitle{Overview of Projects}
\begin{itemize}
\item We propose 4 projects to better decode the structural signals in RNA.
\end{itemize}
\begin{figure}
\includegraphics[width=\textwidth]{phd_s.pdf}
\end{figure}
......@@ -144,8 +166,8 @@
\frametitle{{\bf Project I:} \maternal, How do complex structures evolve?}
\begin{itemize}
\item \problemtag How can populations reach energetically unfavorable structures?
\item \ideatag Model the distribution of mutants.
\item \problemtag {\bf RNA world ~\cite{neveu2013strong}:} How can populations reach energetically favorable yet complex structures?
\end{itemize}
\begin{figure}
......@@ -166,7 +188,7 @@
\frametitle{Sampling methods.}
\begin{itemize}
\item Model probability of $k$ mutant sequence-structure $(\omega, s)\in \mathbb{S}_{\omega_0}^k$ with Boltzmann distribution:
\item \ideatag Model probability of $k$ mutant sequence-structure $(\omega, s)\in \mathbb{S}_{\omega_0}^k$ with Boltzmann distribution:
\begin{equation}
\mathbb{P}(\omega, s) = \frac{e^{\frac{-E(\omega, s)}{RT}}}{\mathcal Z},\qquad \mathcal Z = \sum_{(\omega', s')\in \mathbb{S}_{\omega_0}^k}e^{\frac{-E(\omega', s')}{RT}}.
\end{equation}
......@@ -249,6 +271,7 @@
\begin{frame}
\frametitle{Related Work}
\begin{itemize}
\item \problemtag given a new RNA, can we find which ligand it binds to?
\item The majority of RNA ligands are discovered through phenotypic screens and later attributed to RNA binding. (e.g. ribocil)
\item Computational approaches focus on direct docking.
\begin{itemize}
......@@ -274,7 +297,7 @@
\frametitle{\rnamigos}
\begin{itemize}
\item \problemtag given a new RNA, can we narrow the search space?
\item \problemtag can we narrow the search space?
\item \problemtag we don't have an energy model for non-canonical interactions (unlike \maternal).
\item \ideatag treat RNA and ligand as input-output pair for ML.
\end{itemize}
......@@ -309,7 +332,7 @@
\end{frame}
\begin{frame}
\frametitle{We can use \rnamigos to find binding sites}
\frametitle{We can use \rnamigos scan full structures for binding sites.}
\includegraphics[width=\textwidth]{pocket_finding.png}
\end{frame}
......@@ -353,7 +376,7 @@
\end{itemize}
\begin{figure}
\includegraphics[width=0.8\textwidth]{motifs.png}\cite{leontis2006building}
\includegraphics[width=0.9\textwidth]{motifs.png}\cite{leontis2006building}
\end{figure}
\end{frame}
......@@ -375,6 +398,8 @@
RNA 3D Motif Atlas ~\cite{petrov2013automated} & $\sim$ & \xmark & $\sim$ \\ \hline
\texttt{CaRNAval} ~\cite{reinharz2018mining} & \xmark & \xmark & \cmark \\ \hline
\texttt{RNAMSC} \cite{ge2018novo} & \cmark & \xmark & \xmark \\ \hline
\texttt{GraphClust} \cite{heyne2012graphclust} & \cmark & \xmark & \cmark \\ \hline
{\bf \vernal} & \cmark & \cmark & \cmark
\end{tabular}%
}
......@@ -384,13 +409,13 @@
\end{frame}
\begin{frame}
\frametitle{VERNAL}
\frametitle{\vernal}
\begin{itemize}
\item \problemtag Quickly identify {\it similar} subgraphs from a large set of large graphs.
\item \ideatag Encode each node's structure in vector space and cluster.
\item $Z \in \mathbb{R}^{n \times d}$ embedding matrix RGCN ~\cite{schlichtkrull2018modeling}.
\item $K \in \mathbb{R}^{n \times n}$ similarity matrix (user-defined).
\item \problemtag Quickly identify {\it similar} subgraphs from a large set of large graphs, caring only about {\bf edge type}.
\item \ideatag Encode each node's structure in vector space and cluster (vector comparison faster than graph comparison).
\item $Z \in \mathbb{R}^{n \times d}$ node embedding matrix from Relational Graph Convolution Network (RGCN) ~\cite{schlichtkrull2018modeling}.
\item $K \in \mathbb{R}^{n \times n}$ node similarity matrix (user-defined).
\end{itemize}
\begin{figure}
\includegraphics[width=\textwidth]{vernal_0.pdf}
......@@ -439,21 +464,21 @@
\frametitle{Motif learning}
\begin{itemize}
\item \ideatag ask model to `classify' each node to $m << n$ `motif' centers.
\item \ideatag ask model to `classify' each node to $m << n$ `motif' centers.
\item $\Sigma \in [0,1]^{n \times m}$ soft assignment of each node ($n$) to a motif ($m$)
\item $E \in [0,1]^{m \times d}$ dictionary matrix.
\item $\Sigma^T Z \in \mathbb{R}^{m \times d}$ at cluster $i$ and dimension $j$ is an average embedding weighted by motif attribution.
\item $\Sigma^T Z \in \mathbb{R}^{m \times d}$ at cluster $i$ and dimension $j$ is an {\bf average} embedding weighted by motif attribution.
\begin{equation}
(\Sigma^T Z)_{ij} = \frac{\sum_{k=1}^{n} \sigma_{ik} z_{jk}}{\norm{\sigma_i}}
\end{equation}
\item Forces the model to `summarize' the structures.
\end{itemize}
\begin{figure}
\includegraphics[width=0.6\textwidth]{motif_illustration.pdf}
\includegraphics[width=0.5\textwidth]{motif_illustration.pdf}
\end{figure}
\end{frame}
......@@ -543,6 +568,7 @@
\item Predictive models from distances (\rnamigos ~\cite{oliver2019extended})
\end{itemize}
\item Graph alignment $\sim$ sequential graph editing or node matching.
\item Editing operations assigned a weight by a cost function which reflects the strength of the perturbation.
\end{itemize}
\begin{figure}
......@@ -559,7 +585,7 @@
\end{figure}
\begin{itemize}
\item Motif-level conservation?
\item Motif-level cost?
\end{itemize}
\end{frame}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment