Commit 2408c7f1 authored by Vladimir Reinharz's avatar Vladimir Reinharz
Browse files

table

parent aeb6eeaf
import matplotlib
from matplotlib import pyplot as plt
import networkx as nx
from itertools import combinations
import arnhack
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.rdat',
'../Data/RNAPZ6_1M7_0002.rdat',
'../Data/ADDRSW_SHP_0002.rdat',
'../Data/ADDRSW_SHP_0003.rdat',
'../Data/ADDRSW_SHP_0004.rdat',
'../Data/TRNAPH_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',]
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt',
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt',
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt']
title = ['5S',
'c-di-GMP',
'Cobalamin',
'Adenine_2',
'Adenine_3',
'Adenine_4',
'tRNA',
'Glycosine_2',
'Glycosine_3',
'Glycosine_4',
'Glycosine_5',
]
def make_shape_fig():
i = 0
j = 0
fig, ax = plt.subplots(2,6,figsize=(20, 5))
for z, x in enumerate(rdat_path):
j = z % 6
i = 0 if z < 6 else 1
#plt.subplot(2,6,i+1)
print i, j
a = arnhack.Arnhack(x)
a.add_msa(msa_path[z])
ax[i,j].plot([a.get_shape_dist(x) for x in range(len(a.wt_shape))], linewidth=2)
ax[i,j].set_ylim([0,3.6])
ax[i,j].set_title(title[z])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
ax[i,j].set_xticks(range(len(a.wt_shape)))
ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
ax[-1,-1].axis('off')
ax[0,0].set_ylabel('SHAPE perturbation', fontsize=14)
ax[1, 3].set_xlabel('Mutation position', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
plt.tight_layout()
plt.savefig('Figure_shape_dist.pdf')
def make_dist_fig():
i = 0
j = 0
fig, ax = plt.subplots(2,3,figsize=(15, 6))
z = -1
for zz, x in enumerate(rdat_path):
if title[zz] not in ('5S', 'tRNA', 'c-di-GMP', 'Adenine_2', 'Cobalamin', 'Glycosine_2'):
continue
z += 1
j = z % 3
i = 0 if z < 3 else 1
#plt.subplot(2,6,i+1)
print i, j
a = arnhack.Arnhack(x)
a.add_msa(msa_path[zz])
ss = a.get_ss_msa_on_wt()
g = a.SSE_graph()
components = a.make_components(a.make_tree(ss))
dists = []
for c1, c2 in combinations(components, 2):
dists.extend([max(len(nx.shortest_path(g, x, y)) if
(not(x in c1 and y in c1)) or
(not(x in c2 and y in c2)) else 0
for x in c1 for y in c2)]*len(set(c1)-set(c2))*len(set(c2)-set(c1)))
ax[i,j].hist(dists, linewidth=2)
#ax[i,j].set_ylim([0,2])
ax[i,j].set_title(title[zz].split('_')[0])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
#ax[i,j].set_xticks(range(len(a.wt_shape)))
#ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
ax[0,0].set_ylabel('Nb. of pairs of nts.', fontsize=14)
#print dir(ax[0,0].yaxis)
#ax[0,0].xaxis.set_offset_position(-10)
ax[1, 1].set_xlabel(r'Secondary structure elements distances $\gamma$', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
fig.suptitle('Distribution of pairwise distances', fontsize=15)
plt.subplots_adjust(top=0.85)
#plt.tight_layout()
plt.savefig('Figure_ss_dist.pdf')
def check_ali_quality():
i = 0
j = 0
#fig, ax = plt.subplots(2,3,figsize=(15, 6))
z = -1
for zz, x in enumerate(rdat_path):
if title[zz] not in ('5S', 'tRNA', 'c-di-GMP', 'Adenine_2', 'Cobalamin', 'Glycosine_2'):
continue
z += 1
j = z % 3
i = 0 if z < 3 else 1
#plt.subplot(2,6,i+1)
print i, j, title[zz]
a = arnhack.Arnhack(x)
a.add_msa(msa_path[zz], infernal_align=True)
raw_input()
continue
ss = a.get_ss_msa_on_wt()
g = a.SSE_graph()
components = a.make_components(a.make_tree(ss))
dists = []
for c1, c2 in combinations(components, 2):
dists.extend([max(len(nx.shortest_path(g, x, y)) if
(not(x in c1 and y in c1)) or
(not(x in c2 and y in c2)) else 0
for x in c1 for y in c2)]*len(set(c1)-set(c2))*len(set(c2)-set(c1)))
ax[i,j].hist(dists, linewidth=2)
#ax[i,j].set_ylim([0,2])
ax[i,j].set_title(title[zz].split('_')[0])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
#ax[i,j].set_xticks(range(len(a.wt_shape)))
#ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
ax[0,0].set_ylabel('Nb. of pairs of nts.', fontsize=14)
#print dir(ax[0,0].yaxis)
#ax[0,0].xaxis.set_offset_position(-10)
ax[1, 1].set_xlabel(r'Secondary structure elements distances $\gamma$', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
fig.suptitle('Distribution of pairwise distances', fontsize=15)
plt.subplots_adjust(top=0.85)
#plt.tight_layout()
plt.savefig('Figure_ss_dist.pdf')
if __name__ == '__main__':
#make_shape_fig()
#make_dist_fig()
check_ali_quality()
......@@ -388,33 +388,28 @@ The whole implementation is freely available at:
\subsection{Dataset}
To evaluate the efficiency of our method, data with the desired properties was available for {\color{red}six} RNAs~\cite{Cordero:2012aa}. These required properties are a mutate-and-map experiment data set, a determined three-dimensional structures interacting with other chain(s) and and \rfam alignment. Those {\color{red}six} RNAs are the 5S ribosomal RNA, {\color{red}the phenylalanine tRNA, the c-di-GMP riboswitch, the cobalamin riboswitch (Puzzle 6), the adenine riboswitch and the glycine riboswitch . Info see Table~\ref{table:datasetinfo}.}
To evaluate the efficiency of our method, data with the desired properties was available for {\color{red}six} RNAs~\cite{Cordero:2012aa}. These required properties are a mutate-and-map experiment data set, a determined three-dimensional structures interacting with other chain(s) and and \rfam alignment. Those {\color{red}six} RNAs are the 5S ribosomal RNA, {\color{red} the c-di-GMP riboswitch, the cobalamin riboswitch (Puzzle 6), the adenine riboswitch, the phenylalanine tRNA and the glycine riboswitch . }
{\color{red} REF Table SUP MAT FOR ALL BINDING SITES}
{\color{red}The latest gave poor results, potentially due to an artificial hairpin, introduced in the sequence, binding to a small protein to help the crystalisation. The protein was missing in the MaM experiments. It was thus omitted in the present analysis. The results are shown in supplementary material (Fig.~S7).}
\begin{table}[t]
\rotatebox{90}{
\centering
\colorbox{red}{
\begin{tabular}{lllll}
RNA & Binding to & RFAM & PDB(s) &Binding Positions on PDB \\\hline\hline
5S & Prots. & RF00001 & 2WWQ & $7-13,27-33,38,41-57,59-60,70,73-84,88-104,112-116$\\
& & & 3OAS & $6-12,26-33,37-38,41-52,54-57,59,70,73-84,88-104,112-116$\\
& & & 3OFC & $6-12,27-31,33,37-38,41-52,54-59,73-84,88-104,112-117$\\
& & & 3ORB & $6-12,27-31,33,37-38,41-52,54-59,73-84,88-104,112-116$\\\hline
tRNA & Prots. and DNA & RF00005 &1EHZ & $1, 19, 34-36, 56-57, 73-76$\\\hline
c-di-GMP ribo. & c-di-GMP & RF01051 & 3IWN & $8-10,28,38,53-64,66-72,82$\\
& & & 3MUT & $18-20,38,48,61-64,75,92$\\
& & & 3MUV & $18-20,34,38,48,60-64,75,92$\\
& & & 3MXH & $18-20,38,48,61-64,75,92$\\\hline
cobalamin ribo. & B1Z & RF00174 & 4GXY & $41-43,64-66,72-78,106,108-109,124,148-150,155-157,159-162$\\\hline
adenine ribo. & adenine & RF00167 & 1Y26 & $21-22,47,50-52,73-75$\\\hline
glycine ribo. & glycine & RF00504 & 3P49 & $35-39, 46, 48-42, 110-114,137, 139-143$\\
\begin{tabular}{lc}
\multicolumn{1}{l|}{RNA} & Total relative entropy \\\hline
5S & \phantom{0}70.210 \\
c-di-GMP ribo. & \phantom{0}56.898 \\
cobalamin ribo. & 112.690 \\
adenine ribo. & \phantom{0}60.180 \\
tRNA & \phantom{0}56.303 \\
glycine ribo. & \phantom{0}58.410 \\
\end{tabular}
}
}
\caption{{\color{red}For each RNA some info}}
\label{table:datasetinfo}
\caption{{\color{red}Each sequence total relative entropy when aligned to its RFAM sequence, obtained from \texttt{infernal}}}
\label{table:ali_entropy}
\end{table}
The 5S ribosomal RNA is the family \texttt{RF00001} on \rfam. Its seed alignment consist of $713$ sequences. The family also provides the consensus structure. The mutate-and-map protocol was applied to the consensus sequence of $4$ structures which have as PDB identifiers \texttt{2WWQ}~\cite{2WWQ}, \texttt{3OAS} and \texttt{3OFC}~\cite{3OAS_3OFC}, and \texttt{3ORB}~\cite{3ORB}. We present in Fig.~\ref{fig:shape}a, for every position $i$, the value of $\Delta(S, S_i)$, with the aligned \rfam consensus secondary structure below. Those four determined structures have almost the same sequence with slight differences in the length on their $5'$ and $3'$ extremities.
......@@ -528,13 +523,13 @@ structural differences. We present in Fig.~\ref{fig:dist} the distribution of pa
\begin{figure*}[ht!]
\begin{figure*}[t!]
\centering
\colorbox{red}{
\includegraphics[width=0.96\textwidth]{Figure7.png}
}
\caption{{\bf Distance distribution for pairs of secondary structure elements}, weighted by the numbers of non-shared nucleotides}
\label{fig:dist}
\label{fig:dist}%
\end{figure*}
......
......@@ -224,5 +224,27 @@ The results for c-di-GMP having as positive positions the ones interacting in th
\label{fig:cdigmp_hairpinornot}
\end{figure}
\section{Dataset binding positions}
\begin{table}[ht!]
\begin{tabular}{lllll}
RNA & Binding to & RFAM & PDB(s) &Binding Positions on PDB \\\hline\hline
5S & Prots. & RF00001 & 2WWQ & $7-13,27-33,38,41-57,59-60,70,73-84,88-104,112-116$\\
& & & 3OAS & $6-12,26-33,37-38,41-52,54-57,59,70,73-84,88-104,112-116$\\
& & & 3OFC & $6-12,27-31,33,37-38,41-52,54-59,73-84,88-104,112-117$\\
& & & 3ORB & $6-12,27-31,33,37-38,41-52,54-59,73-84,88-104,112-116$\\\hline
tRNA & Prots. and DNA & RF00005 &1EHZ & $1, 19, 34-36, 56-57, 73-76$\\\hline
c-di-GMP ribo. & c-di-GMP & RF01051 & 3IWN & $8-10,28,38,53-64,66-72,82$\\
& & & 3MUT & $18-20,38,48,61-64,75,92$\\
& & & 3MUV & $18-20,34,38,48,60-64,75,92$\\
& & & 3MXH & $18-20,38,48,61-64,75,92$\\\hline
cobalamin ribo. & B1Z & RF00174 & 4GXY & $41-43,64-66,72-78,106,108-109,124,148-150,155-157,159-162$\\\hline
adenine ribo. & adenine & RF00167 & 1Y26 & $21-22,47,50-52,73-75$\\\hline
glycine ribo. & glycine & RF00504 & 3P49 & $35-39, 46, 48-42, 110-114,137, 139-143$\\
\end{tabular}
\caption{{\color{red}For each RNA some info}}
\label{table:datasetinfo}
\end{table}
\end{document}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment