Commit 59468891 authored by Vladimir Reinharz's avatar Vladimir Reinharz
Browse files

table all data in TeX

parent 9eb39650
This diff is collapsed.
......@@ -8,7 +8,7 @@ import numpy as np
import cPickle
from time import time
from itertools import combinations, product
import matplotlib
#import matplotlib
#matplotlib.use('PDF')
#matplotlib.rc('text', usetex=True)
#from matplotlib import pyplot as plt
......@@ -20,34 +20,61 @@ import networkx as nx
from arnhack import Arnhack
#rdat_path = ['../Data/5SRRNA_SHP_0002.rdat', '../Data/CIDGMP_SHP_0002.rdat']
#msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt','../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt']
#rdat_path = ['../Data/GLYCFN_SHP_0002.rdat', '../Data/GLYCFN_SHP_0003.rdat',
# '../Data/GLYCFN_SHP_0004.rdat', '../Data/GLYCFN_SHP_0005.rdat',
# '../Data/TRNAPH_SHP_0002.rdat']
#msa_path = ['../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00005.stockholm.txt']
#rdat_path = ['../Data/ADDRSW_SHP_0002.rdat', '../Data/ADDRSW_SHP_0003.rdat',
# '../Data/ADDRSW_SHP_0004.rdat']
#msa_path = ['../Data/RF00167.stockholm.txt', '../Data/RF00167.stockholm.txt',
# '../Data/RF00167.stockholm.txt']
rdat_path = ['../Data/RNAPZ6_1M7_0002.rdat',
'../Data/RNAPZ8_1M7_0001.rdat',
'../Data/RNAPZ8_CMCT_0001.rdat',
'../Data/RNAPZ8_DMS_0001.rdat',
'../Data/RNAPZ8_NMD_0001.rdat']
msa_path = ['../Data/RF00174.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt',
'../Data/RF00162.stockholm.txt']
OUT_PATH = '../Data/analysed.txt'
NB_PROCS = 4
"""
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
]
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt']
OUT_PATH = '../Data/analyzed_everything.txt'
rdat_path = ['../Data/CIDGMP_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.only_cidgmp.rdat']
msa_path = ['../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',]
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/TRNAPH_SHP_0002.rdat']
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt']
"""
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/ADDRSW_SHP_0002.rdat',
'../Data/ADDRSW_SHP_0003.rdat',
'../Data/ADDRSW_SHP_0004.rdat']
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt']
#rdat_path = ['../Data/RNAPZ6_1M7_0002.rdat',]
#msa_path = ['../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt']
#RESTRICTIONS = ['analyze_loc']
RESTRICTIONS = ['analyze_loc', 'analyze_max', 'analyze_sse', 'analyze_all']
#OUT_PATH = '../Data/analyzed_glyc_trna.txt'
#OUT_PATH = '../Data/analyzed_glyc_add.txt'
OUT_PATH = '../Data/analyzed_glyc_add_l2.txt'
NB_PROCS = 20
......@@ -63,31 +90,43 @@ class Analyze(Arnhack):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),
('4GB1', 'A', -3),
('4GBI', 'A', -3),
('4GBM', 'A', -3),
('4GIM', 'A', -3),
('4GIR', 'A', -3),
('4GMG', 'A', -3)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -191,8 +230,6 @@ class Analyze(Arnhack):
truth[pdb_id] = {'TP':TP, 'FP':FP, 'P':len(P),
'TN':TN, 'FN':FN, 'N':len(N)}
return truth
def get_sen_spe_truth(self, shape_delta, gamma, zeta):
......@@ -229,7 +266,16 @@ class Analyze(Arnhack):
data = self.get_roc(shape_delta, gamma, zetas_min, zetas_max)
roc = []
for pdb_id in data:
rna = os.path.basename(self.path).rsplit('.')[0]
for pdb_id,chain, offset in sorted(self.d[rna], key=lambda x:x[0]):
if pdb_id not in data:
roc.append(np.nan)
continue
if data[pdb_id][-1] != (1, 1):
data[pdb_id].append(1, 1)
if data[pdb_id][0] != (0, 0):
data[pdb_id] = [(0,0)] + data[pdb_id]
to_plot = np.array(data[pdb_id])
roc.append(np.sum((to_plot[:-1,1]+to_plot[1:,1])*(to_plot[1:,0]-to_plot[:-1,0]))/2)
"""
......@@ -279,24 +325,43 @@ class Analyze_all_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -314,7 +379,7 @@ class Analyze_all_shape_dists(Analyze):
class Analyze_max_shape_dists(Analyze):
def __init__(self, *args, **kwargs):
"""Init with Arnhack"""
super(Analyze_all_shape_dists, self).__init__(*args, **kwargs)
super(Analyze_max_shape_dists, self).__init__(*args, **kwargs)
self.get_shape_dist = self.max_shape_dist
......@@ -323,24 +388,43 @@ class Analyze_max_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -352,8 +436,6 @@ class Analyze_max_shape_dists(Analyze):
if mut_pos not in self.shape:
return None
l_bnd = max(0, mut_pos-delta)
u_bnd = min(mut_pos+delta+1, len(self.wt))
return max(l2(self.shape[mut_pos][max(0, i-delta):min(i+delta+1, len(self.wt))],
self.wt_shape[max(0, i-delta):min(i+delta+1, len(self.wt))])
for i in range(len(self.wt)))
......@@ -370,24 +452,43 @@ class Analyze_sse_shape_dists(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -410,28 +511,29 @@ class Analyze_sse_shape_dists(Analyze):
def slave_roc(args):
roc = []
shape_delta, gamma, z_min, z_max = args
classes = {'analyze_loc':Analyze, 'analyze_all':Analyze_all_shape_dists,
'analyze_max':Analyze_max_shape_dists,
'analyze_sse':Analyze_sse_shape_dists}
for i, rpath in enumerate(rdat_path):
mpath = msa_path[i]
ana = Analyze(rpath)
ana.add_msa(msa_path[i])
ana.msa_npmi()
t = time()
roc.append(ana.graph_roc(shape_delta, gamma, z_min, z_max))
tmp_l = []
for c in classes:
if c not in RESTRICTIONS:
continue
ana = classes[c](rpath)
ana.add_msa(msa_path[i])
ana.msa_npmi()
t = time()
tmp_l.append(tuple([c] + list(
ana.graph_roc(shape_delta, gamma, z_min, z_max))))
print c, 'done'
roc.append(tuple(tmp_l))
print 'to compute one truth', time() - t
return tuple([(x, shape_delta, gamma) for x in roc if roc])
if __name__ == '__main__':
for i, rpath in enumerate(rdat_path):
print rpath
mpath = msa_path[i]
print mpath
ana = Analyze(rpath)
ana.add_msa(msa_path[i], infernal_align=True)
print ana.resi_close()
sys.exit()
args = ((shape_delta, gamma, 0, 100) for shape_delta in range(80, 99) for gamma in range(1,15))
args = ((shape_delta, gamma, 0, 100) for shape_delta in range(80, 100) for gamma in range(1,35))
pool = Pool(processes=NB_PROCS)
out = []
for x in pool.imap_unordered(slave_roc, args):
......
import __main__
__main__.pymol_argv = ['pymol','-qc']
#import matplotlib
#matplotlib.use('PDF')
#matplotlib.rc('text', usetex=True)
#import __main__
#__main__.pymol_argv = ['pymol','-qc']
import sys
import os
......@@ -8,10 +11,7 @@ import numpy as np
import cPickle
from time import time
from itertools import combinations, product
import matplotlib
matplotlib.use('PDF')
matplotlib.rc('text', usetex=True)
from matplotlib import pyplot as plt
#from matplotlib import pyplot as plt
from pprint import pprint
from multiprocessing import Pool
from subprocess import check_output
......@@ -19,15 +19,48 @@ from tempfile import NamedTemporaryFile as NTF
#import pymol
import networkx as nx
from numpy import average
from analyze import Analyze
PATH_REMU = '/Users/vreinharz/Applications/remuRNA/remuRNA'
PATH_REMU = '/home/mcb/vreinh/Applications/remuRNA/remuRNA'
"""
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat', '../Data/CIDGMP_SHP_0002.rdat']
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt','../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt']
OUT_PATH = '../Data/analysed_remu.txt'
NB_PROCS = 4
OUT_PATH = '../Data/analyzed_remushape.txt'
rdat_path = ['../Data/CIDGMP_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.only_cidgmp.rdat']
msa_path = ['../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',]
"""
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/ADDRSW_SHP_0002.rdat',
'../Data/ADDRSW_SHP_0003.rdat',
'../Data/ADDRSW_SHP_0004.rdat',
'../Data/TRNAPH_SHP_0002.rdat',
'../Data/RNAPZ6_1M7_0002.rdat',]
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt',
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt',
'../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt']
rdat_path = ['../Data/TRNAPH_SHP_0002.rdat']
msa_path = ['../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt']
OUT_PATH = '../Data/analyzed_trna_remu.txt'
NB_PROCS = 5
class Remu(Analyze):
"""Class to plot and analyze data from Arnhack"""
......@@ -42,25 +75,44 @@ class Remu(Analyze):
('3OAS', 'B', 0),
('3OFC', 'B', 0),
('3ORB', 'B', 0)],
'ADDRSW_SHP_0002':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0003':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0004':[('1Y26', 'X', -12),
('1Y27', 'X', -12),
('2G9C', 'A', -12)],
'ADDRSW_SHP_0002':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0003':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'ADDRSW_SHP_0004':[('1YAD', 'X', -12),
('1YMG', 'X', -12),
('1YAL', 'X', -12)],
'CIDGMP_SHP_0002':[('3MXH', 'R', -8),
('3IWN', 'A', 2),
('3MUV', 'R', -8),
('3MUT', 'R', -8)],
'GLYCFN_SHP_0002':[('3P49', 'A', 0)],
'GLYCFN_SHP_0003':[('3P49', 'A', 0)],
'GLYCFN_SHP_0004':[('3P49', 'A', 0)],
'GLYCFN_SHP_0005':[('3P49', 'A', 0)],
'GLYCFN_SHP_0002':[('3PGM', 'A', 0), #gly+mg
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0003':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0004':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'GLYCFN_SHP_0005':[('3PGM', 'A', 0),
('3PGL', 'A', 0), #gly
('3PGP', 'A', 0), #gly + prot
('3PAL', 'A', 0)], #gly + mg + prot
'TRNAPH_SHP_0002':[('1EHZ', 'A', -1)],
}
'RNAPZ6_1M7_0002':[('4GAL', 'A', -3),#all
('4GB1', 'A', -3),#B12
('4GBI', 'A', -3),#B12 + IRI
('4GBM', 'A', -3),#B12 + MG
('4GIM', 'A', -3),#IRI + MG
('4GIR', 'A', -3),#IRI
('4GMG', 'A', -3)],#MG
}
......@@ -79,7 +131,7 @@ class Remu(Analyze):
try:
out = check_output([PATH_REMU, tmp_file.name])
except:
print 'wtf'
print 'remuRNA problem:', sys.exc_info()[0]
os.remove(tmp_file.name)
return
os.remove(tmp_file.name)
......@@ -99,9 +151,19 @@ class Remu(Analyze):
data = self.get_roc(shape_delta, gamma, zetas_min, zetas_max)
roc = []
for pdb_id in data:
rna = os.path.basename(self.path).rsplit('.')[0]
for pdb_id,chain, offset in sorted(self.d[rna], key=lambda x:x[0]):
if pdb_id not in data:
roc.append(np.nan)
continue
if data[pdb_id][-1] != (1, 1):
data[pdb_id].append(1, 1)
if data[pdb_id][0] != (0, 0):
data[pdb_id] = [(0,0)] + data[pdb_id]
to_plot = np.array(data[pdb_id])
roc.append(np.sum((to_plot[:-1,1]+to_plot[1:,1])*(to_plot[1:,0]-to_plot[:-1,0]))/2)
"""
plt.plot(to_plot[:,0], to_plot[:,1], color=colors[pdb_id], lw=3, label=pdb_id)
plt.plot([0,1], [0,1], '-k')
plt.ylabel('Sensitivity', fontsize=16)
......@@ -111,8 +173,13 @@ class Remu(Analyze):
plt.title("Best AUC %.3f" % max(roc) if roc else 0, fontsize=16)
plt.legend(loc='lower right')
plt.show()
plt.savefig('%s_remu_%s_%s' % (os.path.basename(self.path).split('_',1)[0],shape_delta, gamma))
return tuple(roc) if roc else 0
plt.savefig('%s_%s_%s' % (os.path.basename(self.path).split('_',1)[0],shape_delta, gamma))
plt.clf()
"""
#return max(roc) if roc else 0
return tuple(roc) if roc else (0,)
class RemuSHAPE(Analyze):
"""Class to plot and analyze data from Arnhack"""
......@@ -187,9 +254,19 @@ class RemuSHAPE(Analyze):
data = self.get_roc(shape_delta, gamma, zetas_min, zetas_max)
roc = []
for pdb_id in data:
rna = os.path.basename(self.path).rsplit('.')[0]
for pdb_id,chain, offset in sorted(self.d[rna], key=lambda x:x[0]):
if pdb_id not in data: