Commit 96f4a373 authored by yannponty's avatar yannponty
Browse files

Merge branch 'master' of jwgitlab.cs.mcgill.ca:vreinharz/arnhack

Conflicts:
	TeX/NAR/main_NAR.tex
parents 0bfdbba9 7ad05b4e
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -8,7 +8,7 @@ import numpy as np
import cPickle
from time import time
from itertools import combinations, product
import matplotlib
#import matplotlib
#matplotlib.use('PDF')
#matplotlib.rc('text', usetex=True)
#from matplotlib import pyplot as plt
......@@ -20,34 +20,61 @@ import networkx as nx
from arnhack import Arnhack
#rdat_path = ['../Data/5SRRNA_SHP_0002.rdat', '../Data/CIDGMP_SHP_0002.rdat']
#msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt','../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt']
#rdat_path = ['../Data/GLYCFN_SHP_0002.rdat', '../Data/GLYCFN_SHP_0003.rdat',
# '../Data/GLYCFN_SHP_0004.rdat', '../Data/GLYCFN_SHP_0005.rdat',
# '../Data/TRNAPH_SHP_0002.rdat']
#msa_path = ['../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00005.stockholm.txt']
#rdat_path = ['../Data/ADDRSW_SHP_0002.rdat', '../Data/ADDRSW_SHP_0003.rdat',
# '../Data/ADDRSW_SHP_0004.rdat']
#msa_path = ['../Data/RF00167.stockholm.txt', '../Data/RF00167.stockholm.txt',
# '../Data/RF00167.stockholm.txt']
rdat_path = ['../Data/RNAPZ6_1M7_0002.rdat',
'../Data/RNAPZ8_1M7_0001.rdat',
'../Data/RNAPZ8_CMCT_0001.rdat',
'../Data/RNAPZ8_DMS_0001.rdat',
'../Data/RNAPZ8_NMD_0001.rdat']
msa_path = ['../Data/RF00174.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt']
OUT_PATH = '../Data/analysed.txt'
NB_PROCS = 4
"""
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
]
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt']
OUT_PATH = '../Data/analyzed_everything.txt'
rdat_path = ['../Data/CIDGMP_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.only_cidgmp.rdat']
msa_path = ['../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',]
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/TRNAPH_SHP_0002.rdat']
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt']
"""
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/ADDRSW_SHP_0002.rdat',
'../Data/ADDRSW_SHP_0003.rdat',
'../Data/ADDRSW_SHP_0004.rdat']
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt']
#rdat_path = ['../Data/RNAPZ6_1M7_0002.rdat',]
#msa_path = ['../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt']
#RESTRICTIONS = ['analyze_loc']
RESTRICTIONS = ['analyze_loc', 'analyze_max', 'analyze_sse', 'analyze_all']
#OUT_PATH = '../Data/analyzed_glyc_trna.txt'
#OUT_PATH = '../Data/analyzed_glyc_add.txt'
OUT_PATH = '../Data/analyzed_glyc_add_l2.txt'
NB_PROCS = 20
......@@ -63,31 +90,43 @@ class Analyze(Arnhack):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),
('4GB1', 'A', -3),
('4GBI', 'A', -3),
('4GBM', 'A', -3),
('4GIM', 'A', -3),
('4GIR', 'A', -3),
('4GMG', 'A', -3)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -191,8 +230,6 @@ class Analyze(Arnhack):
truth[pdb_id] = {'TP':TP, 'FP':FP, 'P':len(P),
'TN':TN, 'FN':FN, 'N':len(N)}
return truth
def get_sen_spe_truth(self, shape_delta, gamma, zeta):
......@@ -229,7 +266,16 @@ class Analyze(Arnhack):
data = self.get_roc(shape_delta, gamma, zetas_min, zetas_max)
roc = []
for pdb_id in data:
rna = os.path.basename(self.path).rsplit('.')[0]
for pdb_id,chain, offset in sorted(self.d[rna], key=lambda x:x[0]):
if pdb_id not in data:
roc.append(np.nan)
continue
if data[pdb_id][-1] != (1, 1):
data[pdb_id].append(1, 1)
if data[pdb_id][0] != (0, 0):
data[pdb_id] = [(0,0)] + data[pdb_id]
to_plot = np.array(data[pdb_id])
roc.append(np.sum((to_plot[:-1,1]+to_plot[1:,1])*(to_plot[1:,0]-to_plot[:-1,0]))/2)
"""
......@@ -279,24 +325,43 @@ class Analyze_all_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -314,7 +379,7 @@ class Analyze_all_shape_dists(Analyze):
class Analyze_max_shape_dists(Analyze):
def __init__(self, *args, **kwargs):
"""Init with Arnhack"""
super(Analyze_all_shape_dists, self).__init__(*args, **kwargs)
super(Analyze_max_shape_dists, self).__init__(*args, **kwargs)
self.get_shape_dist = self.max_shape_dist
......@@ -323,24 +388,43 @@ class Analyze_max_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -352,8 +436,6 @@ class Analyze_max_shape_dists(Analyze):
if mut_pos not in self.shape:
return None
l_bnd = max(0, mut_pos-delta)
u_bnd = min(mut_pos+delta+1, len(self.wt))
return max(l2(self.shape[mut_pos][max(0, i-delta):min(i+delta+1, len(self.wt))],
self.wt_shape[max(0, i-delta):min(i+delta+1, len(self.wt))])
for i in range(len(self.wt)))
......@@ -370,24 +452,43 @@ class Analyze_sse_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -410,28 +511,29 @@ class Analyze_sse_shape_dists(Analyze):
def slave_roc(args):
roc = []
shape_delta, gamma, z_min, z_max = args
classes = {'analyze_loc':Analyze, 'analyze_all':Analyze_all_shape_dists,
'analyze_max':Analyze_max_shape_dists,
'analyze_sse':Analyze_sse_shape_dists}
for i, rpath in enumerate(rdat_path):
mpath = msa_path[i]
ana = Analyze(rpath)
ana.add_msa(msa_path[i])
ana.msa_npmi()
t = time()
roc.append(ana.graph_roc(shape_delta, gamma, z_min, z_max))
tmp_l = []
for c in classes:
if c not in RESTRICTIONS:
continue
ana = classes[c](rpath)
ana.add_msa(msa_path[i])
ana.msa_npmi()
t = time()
tmp_l.append(tuple([c] + list(
ana.graph_roc(shape_delta, gamma, z_min, z_max))))
print c, 'done'
roc.append(tuple(tmp_l))
print 'to compute one truth', time() - t
return tuple([(x, shape_delta, gamma) for x in roc if roc])
if __name__ == '__main__':
for i, rpath in enumerate(rdat_path):
print rpath
mpath = msa_path[i]
print mpath
ana = Analyze(rpath)
ana.add_msa(msa_path[i], infernal_align=True)
print ana.resi_close()
sys.exit()
args = ((shape_delta, gamma, 0, 100) for shape_delta in range(80, 99) for gamma in range(1,15))
args = ((shape_delta, gamma, 0, 100) for shape_delta in range(80, 100) for gamma in range(1,35))
pool = Pool(processes=NB_PROCS)
out = []
for x in pool.imap_unordered(slave_roc, args):
......
......@@ -235,8 +235,8 @@ class Arnhack(object):
tmp_fasta.close()
try:
call(['cmalign', '--mapali', ali_path, tmp_cm.name, tmp_fasta.name],
stdout=open(rdat_name, 'w'))
call(['cmalign', '-o', rdat_name, '--verbose', '--mapali', ali_path, tmp_cm.name, tmp_fasta.name], )
#stdout=open(rdat_name, 'w'))
except:
print "Error with cmalign, from Infernal"
sys.exit(1)
......
import matplotlib
from matplotlib import pyplot as plt
import networkx as nx
from itertools import combinations
import arnhack
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.rdat',
'../Data/RNAPZ6_1M7_0002.rdat',
'../Data/ADDRSW_SHP_0002.rdat',
'../Data/ADDRSW_SHP_0003.rdat',
'../Data/ADDRSW_SHP_0004.rdat',
'../Data/TRNAPH_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',]
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt',
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt',
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt']
title = ['5S',
'c-di-GMP',
'Cobalamin',
'Adenine_2',
'Adenine_3',
'Adenine_4',
'tRNA',
'Glycosine_2',
'Glycosine_3',
'Glycosine_4',
'Glycosine_5',
]
def make_shape_fig():
i = 0
j = 0
fig, ax = plt.subplots(2,6,figsize=(20, 5))
for z, x in enumerate(rdat_path):
j = z % 6
i = 0 if z < 6 else 1
#plt.subplot(2,6,i+1)
print i, j
a = arnhack.Arnhack(x)
a.add_msa(msa_path[z])
ax[i,j].plot([a.get_shape_dist(x) for x in range(len(a.wt_shape))], linewidth=2)
ax[i,j].set_ylim([0,3.6])
ax[i,j].set_title(title[z])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
ax[i,j].set_xticks(range(len(a.wt_shape)))
ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
ax[-1,-1].axis('off')
ax[0,0].set_ylabel('SHAPE perturbation', fontsize=14)
ax[1, 3].set_xlabel('Mutation position', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
plt.tight_layout()
plt.savefig('Figure_shape_dist.pdf')
def make_dist_fig():
i = 0
j = 0
fig, ax = plt.subplots(2,3,figsize=(15, 6))
z = -1
for zz, x in enumerate(rdat_path):
if title[zz] not in ('5S', 'tRNA', 'c-di-GMP', 'Adenine_2', 'Cobalamin', 'Glycosine_2'):
continue
z += 1
j = z % 3
i = 0 if z < 3 else 1
#plt.subplot(2,6,i+1)
print i, j
a = arnhack.Arnhack(x)
a.add_msa(msa_path[zz])
ss = a.get_ss_msa_on_wt()
g = a.SSE_graph()
components = a.make_components(a.make_tree(ss))
dists = []
for c1, c2 in combinations(components, 2):
dists.extend([max(len(nx.shortest_path(g, x, y)) if
(not(x in c1 and y in c1)) or
(not(x in c2 and y in c2)) else 0
for x in c1 for y in c2)]*len(set(c1)-set(c2))*len(set(c2)-set(c1)))
ax[i,j].hist(dists, linewidth=2)
#ax[i,j].set_ylim([0,2])
ax[i,j].set_title(title[zz].split('_')[0])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
#ax[i,j].set_xticks(range(len(a.wt_shape)))
#ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
ax[0,0].set_ylabel('Nb. of pairs of nts.', fontsize=14)
#print dir(ax[0,0].yaxis)
#ax[0,0].xaxis.set_offset_position(-10)
ax[1, 1].set_xlabel(r'Secondary structure elements distances $\gamma$', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
fig.suptitle('Distribution of pairwise distances', fontsize=15)
plt.subplots_adjust(top=0.85)
#plt.tight_layout()
plt.savefig('Figure_ss_dist.pdf')
def check_ali_quality():
i = 0
j = 0
#fig, ax = plt.subplots(2,3,figsize=(15, 6))
z = -1
for zz, x in enumerate(rdat_path):
if title[zz] not in ('5S', 'tRNA', 'c-di-GMP', 'Adenine_2', 'Cobalamin', 'Glycosine_2'):
continue
z += 1
j = z % 3
i = 0 if z < 3 else 1
#plt.subplot(2,6,i+1)
print i, j, title[zz]
a = arnhack.Arnhack(x)
a.add_msa(msa_path[zz], infernal_align=True)
continue
ss = a.get_ss_msa_on_wt()
g = a.SSE_graph()
components = a.make_components(a.make_tree(ss))
dists = []
for c1, c2 in combinations(components, 2):
dists.extend([max(len(nx.shortest_path(g, x, y)) if
(not(x in c1 and y in c1)) or
(not(x in c2 and y in c2)) else 0
for x in c1 for y in c2)]*len(set(c1)-set(c2))*len(set(c2)-set(c1)))
ax[i,j].hist(dists, linewidth=2)
#ax[i,j].set_ylim([0,2])
ax[i,j].set_title(title[zz].split('_')[0])
#ax[i,j].set_xticks(range(len(a.wt_shape)), list(a.get_ss_msa_on_wt()))
#ax[i,j].set_xticks(range(len(a.wt_shape)))
#ax[i,j].set_xticklabels(list(a.get_ss_msa_on_wt()))
#ax[0,0].set_ylabel('Nb. of pairs of nts.', fontsize=14)
#print dir(ax[0,0].yaxis)
#ax[0,0].xaxis.set_offset_position(-10)
#ax[1, 1].set_xlabel(r'Secondary structure elements distances $\gamma$', fontsize=14)
#plt.ylabel('SHAPE reactivity', fontsize=14)
#plt.xlabel('Sequence position', fontsize=14)
fig.suptitle('Distribution of pairwise distances', fontsize=15)
plt.subplots_adjust(top=0.85)