Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Vladimir Reinharz
aRNhAck
Commits
59468891
Commit
59468891
authored
Feb 10, 2016
by
Vladimir Reinharz
Browse files
table all data in TeX
parent
9eb39650
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Data/analyzed_final.txt
View file @
59468891
This diff is collapsed.
Click to expand it.
Src/analyze.py
View file @
59468891
...
...
@@ -8,7 +8,7 @@ import numpy as np
import
cPickle
from
time
import
time
from
itertools
import
combinations
,
product
import
matplotlib
#
import matplotlib
#matplotlib.use('PDF')
#matplotlib.rc('text', usetex=True)
#from matplotlib import pyplot as plt
...
...
@@ -20,34 +20,61 @@ import networkx as nx
from
arnhack
import
Arnhack
#rdat_path = ['../Data/5SRRNA_SHP_0002.rdat', '../Data/CIDGMP_SHP_0002.rdat']
#msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt','../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt']
#rdat_path = ['../Data/GLYCFN_SHP_0002.rdat', '../Data/GLYCFN_SHP_0003.rdat',
# '../Data/GLYCFN_SHP_0004.rdat', '../Data/GLYCFN_SHP_0005.rdat',
# '../Data/TRNAPH_SHP_0002.rdat']
#msa_path = ['../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00504.stockholm.txt', '../Data/RF00504.stockholm.txt',
# '../Data/RF00005.stockholm.txt']
#rdat_path = ['../Data/ADDRSW_SHP_0002.rdat', '../Data/ADDRSW_SHP_0003.rdat',
# '../Data/ADDRSW_SHP_0004.rdat']
#msa_path = ['../Data/RF00167.stockholm.txt', '../Data/RF00167.stockholm.txt',
# '../Data/RF00167.stockholm.txt']
rdat_path
=
[
'../Data/RNAPZ6_1M7_0002.rdat'
,
'../Data/RNAPZ8_1M7_0001.rdat'
,
'../Data/RNAPZ8_CMCT_0001.rdat'
,
'../Data/RNAPZ8_DMS_0001.rdat'
,
'../Data/RNAPZ8_NMD_0001.rdat'
]
msa_path
=
[
'../Data/RF00174.stockholm.txt'
,
'../Data/RF00162.stockholm.txt'
,
'../Data/RF00162.stockholm.txt'
,
'../Data/RF00162.stockholm.txt'
,
'../Data/RF00162.stockholm.txt'
]
OUT_PATH
=
'../Data/analysed.txt'
NB_PROCS
=
4
"""
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
]
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt']
OUT_PATH = '../Data/analyzed_everything.txt'
rdat_path = ['../Data/CIDGMP_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.only_cidgmp.rdat']
msa_path = ['../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',]
rdat_path = ['../Data/GLYCFN_SHP_0002.rdat',
'../Data/GLYCFN_SHP_0003.rdat',
'../Data/GLYCFN_SHP_0004.rdat',
'../Data/GLYCFN_SHP_0005.rdat',
'../Data/TRNAPH_SHP_0002.rdat']
msa_path = ['../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt',
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt',
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt']
"""
rdat_path
=
[
'../Data/GLYCFN_SHP_0002.rdat'
,
'../Data/GLYCFN_SHP_0003.rdat'
,
'../Data/GLYCFN_SHP_0004.rdat'
,
'../Data/GLYCFN_SHP_0005.rdat'
,
'../Data/ADDRSW_SHP_0002.rdat'
,
'../Data/ADDRSW_SHP_0003.rdat'
,
'../Data/ADDRSW_SHP_0004.rdat'
]
msa_path
=
[
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt'
,
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt'
,
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt'
,
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt'
]
#rdat_path = ['../Data/RNAPZ6_1M7_0002.rdat',]
#msa_path = ['../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt']
#RESTRICTIONS = ['analyze_loc']
RESTRICTIONS
=
[
'analyze_loc'
,
'analyze_max'
,
'analyze_sse'
,
'analyze_all'
]
#OUT_PATH = '../Data/analyzed_glyc_trna.txt'
#OUT_PATH = '../Data/analyzed_glyc_add.txt'
OUT_PATH
=
'../Data/analyzed_glyc_add_l2.txt'
NB_PROCS
=
20
...
...
@@ -63,31 +90,43 @@ class Analyze(Arnhack):
(
'3OAS'
,
'B'
,
0
),
(
'3OFC'
,
'B'
,
0
),
(
'3ORB'
,
'B'
,
0
)],
'ADDRSW_SHP_0002'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0002'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'CIDGMP_SHP_0002'
:[(
'3MXH'
,
'R'
,
-
8
),
(
'3IWN'
,
'A'
,
2
),
(
'3MUV'
,
'R'
,
-
8
),
(
'3MUT'
,
'R'
,
-
8
)],
'GLYCFN_SHP_0002'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0003'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0004'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0005'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0002'
:[(
'3PGM'
,
'A'
,
0
),
#gly+mg
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0003'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0004'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0005'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'TRNAPH_SHP_0002'
:[(
'1EHZ'
,
'A'
,
-
1
)],
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
(
'4GB1'
,
'A'
,
-
3
),
(
'4GBI'
,
'A'
,
-
3
),
(
'4GBM'
,
'A'
,
-
3
),
(
'4GIM'
,
'A'
,
-
3
),
(
'4GIR'
,
'A'
,
-
3
),
(
'4GMG'
,
'A'
,
-
3
)],
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
#all
(
'4GB1'
,
'A'
,
-
3
),
#B12
(
'4GBI'
,
'A'
,
-
3
),
#B12 + IRI
(
'4GBM'
,
'A'
,
-
3
),
#B12 + MG
(
'4GIM'
,
'A'
,
-
3
),
#IRI + MG
(
'4GIR'
,
'A'
,
-
3
),
#IRI
(
'4GMG'
,
'A'
,
-
3
)],
#MG
}
...
...
@@ -191,8 +230,6 @@ class Analyze(Arnhack):
truth
[
pdb_id
]
=
{
'TP'
:
TP
,
'FP'
:
FP
,
'P'
:
len
(
P
),
'TN'
:
TN
,
'FN'
:
FN
,
'N'
:
len
(
N
)}
return
truth
def
get_sen_spe_truth
(
self
,
shape_delta
,
gamma
,
zeta
):
...
...
@@ -229,7 +266,16 @@ class Analyze(Arnhack):
data
=
self
.
get_roc
(
shape_delta
,
gamma
,
zetas_min
,
zetas_max
)
roc
=
[]
for
pdb_id
in
data
:
rna
=
os
.
path
.
basename
(
self
.
path
).
rsplit
(
'.'
)[
0
]
for
pdb_id
,
chain
,
offset
in
sorted
(
self
.
d
[
rna
],
key
=
lambda
x
:
x
[
0
]):
if
pdb_id
not
in
data
:
roc
.
append
(
np
.
nan
)
continue
if
data
[
pdb_id
][
-
1
]
!=
(
1
,
1
):
data
[
pdb_id
].
append
(
1
,
1
)
if
data
[
pdb_id
][
0
]
!=
(
0
,
0
):
data
[
pdb_id
]
=
[(
0
,
0
)]
+
data
[
pdb_id
]
to_plot
=
np
.
array
(
data
[
pdb_id
])
roc
.
append
(
np
.
sum
((
to_plot
[:
-
1
,
1
]
+
to_plot
[
1
:,
1
])
*
(
to_plot
[
1
:,
0
]
-
to_plot
[:
-
1
,
0
]))
/
2
)
"""
...
...
@@ -279,24 +325,43 @@ class Analyze_all_shape_dists(Analyze):
(
'3OAS'
,
'B'
,
0
),
(
'3OFC'
,
'B'
,
0
),
(
'3ORB'
,
'B'
,
0
)],
'ADDRSW_SHP_0002'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0002'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'CIDGMP_SHP_0002'
:[(
'3MXH'
,
'R'
,
-
8
),
(
'3IWN'
,
'A'
,
2
),
(
'3MUV'
,
'R'
,
-
8
),
(
'3MUT'
,
'R'
,
-
8
)],
'GLYCFN_SHP_0002'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0003'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0004'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0005'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0002'
:[(
'3PGM'
,
'A'
,
0
),
#gly+mg
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0003'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0004'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0005'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'TRNAPH_SHP_0002'
:[(
'1EHZ'
,
'A'
,
-
1
)],
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
#all
(
'4GB1'
,
'A'
,
-
3
),
#B12
(
'4GBI'
,
'A'
,
-
3
),
#B12 + IRI
(
'4GBM'
,
'A'
,
-
3
),
#B12 + MG
(
'4GIM'
,
'A'
,
-
3
),
#IRI + MG
(
'4GIR'
,
'A'
,
-
3
),
#IRI
(
'4GMG'
,
'A'
,
-
3
)],
#MG
}
...
...
@@ -314,7 +379,7 @@ class Analyze_all_shape_dists(Analyze):
class
Analyze_max_shape_dists
(
Analyze
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
"""Init with Arnhack"""
super
(
Analyze_
all
_shape_dists
,
self
).
__init__
(
*
args
,
**
kwargs
)
super
(
Analyze_
max
_shape_dists
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
get_shape_dist
=
self
.
max_shape_dist
...
...
@@ -323,24 +388,43 @@ class Analyze_max_shape_dists(Analyze):
(
'3OAS'
,
'B'
,
0
),
(
'3OFC'
,
'B'
,
0
),
(
'3ORB'
,
'B'
,
0
)],
'ADDRSW_SHP_0002'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0002'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'CIDGMP_SHP_0002'
:[(
'3MXH'
,
'R'
,
-
8
),
(
'3IWN'
,
'A'
,
2
),
(
'3MUV'
,
'R'
,
-
8
),
(
'3MUT'
,
'R'
,
-
8
)],
'GLYCFN_SHP_0002'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0003'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0004'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0005'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0002'
:[(
'3PGM'
,
'A'
,
0
),
#gly+mg
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0003'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0004'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0005'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'TRNAPH_SHP_0002'
:[(
'1EHZ'
,
'A'
,
-
1
)],
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
#all
(
'4GB1'
,
'A'
,
-
3
),
#B12
(
'4GBI'
,
'A'
,
-
3
),
#B12 + IRI
(
'4GBM'
,
'A'
,
-
3
),
#B12 + MG
(
'4GIM'
,
'A'
,
-
3
),
#IRI + MG
(
'4GIR'
,
'A'
,
-
3
),
#IRI
(
'4GMG'
,
'A'
,
-
3
)],
#MG
}
...
...
@@ -352,8 +436,6 @@ class Analyze_max_shape_dists(Analyze):
if
mut_pos
not
in
self
.
shape
:
return
None
l_bnd
=
max
(
0
,
mut_pos
-
delta
)
u_bnd
=
min
(
mut_pos
+
delta
+
1
,
len
(
self
.
wt
))
return
max
(
l2
(
self
.
shape
[
mut_pos
][
max
(
0
,
i
-
delta
):
min
(
i
+
delta
+
1
,
len
(
self
.
wt
))],
self
.
wt_shape
[
max
(
0
,
i
-
delta
):
min
(
i
+
delta
+
1
,
len
(
self
.
wt
))])
for
i
in
range
(
len
(
self
.
wt
)))
...
...
@@ -370,24 +452,43 @@ class Analyze_sse_shape_dists(Analyze):
(
'3OAS'
,
'B'
,
0
),
(
'3OFC'
,
'B'
,
0
),
(
'3ORB'
,
'B'
,
0
)],
'ADDRSW_SHP_0002'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0002'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'CIDGMP_SHP_0002'
:[(
'3MXH'
,
'R'
,
-
8
),
(
'3IWN'
,
'A'
,
2
),
(
'3MUV'
,
'R'
,
-
8
),
(
'3MUT'
,
'R'
,
-
8
)],
'GLYCFN_SHP_0002'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0003'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0004'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0005'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0002'
:[(
'3PGM'
,
'A'
,
0
),
#gly+mg
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0003'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0004'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0005'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'TRNAPH_SHP_0002'
:[(
'1EHZ'
,
'A'
,
-
1
)],
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
#all
(
'4GB1'
,
'A'
,
-
3
),
#B12
(
'4GBI'
,
'A'
,
-
3
),
#B12 + IRI
(
'4GBM'
,
'A'
,
-
3
),
#B12 + MG
(
'4GIM'
,
'A'
,
-
3
),
#IRI + MG
(
'4GIR'
,
'A'
,
-
3
),
#IRI
(
'4GMG'
,
'A'
,
-
3
)],
#MG
}
...
...
@@ -410,28 +511,29 @@ class Analyze_sse_shape_dists(Analyze):
def
slave_roc
(
args
):
roc
=
[]
shape_delta
,
gamma
,
z_min
,
z_max
=
args
classes
=
{
'analyze_loc'
:
Analyze
,
'analyze_all'
:
Analyze_all_shape_dists
,
'analyze_max'
:
Analyze_max_shape_dists
,
'analyze_sse'
:
Analyze_sse_shape_dists
}
for
i
,
rpath
in
enumerate
(
rdat_path
):
mpath
=
msa_path
[
i
]
ana
=
Analyze
(
rpath
)
ana
.
add_msa
(
msa_path
[
i
])
ana
.
msa_npmi
()
t
=
time
()
roc
.
append
(
ana
.
graph_roc
(
shape_delta
,
gamma
,
z_min
,
z_max
))
tmp_l
=
[]
for
c
in
classes
:
if
c
not
in
RESTRICTIONS
:
continue
ana
=
classes
[
c
](
rpath
)
ana
.
add_msa
(
msa_path
[
i
])
ana
.
msa_npmi
()
t
=
time
()
tmp_l
.
append
(
tuple
([
c
]
+
list
(
ana
.
graph_roc
(
shape_delta
,
gamma
,
z_min
,
z_max
))))
print
c
,
'done'
roc
.
append
(
tuple
(
tmp_l
))
print
'to compute one truth'
,
time
()
-
t
return
tuple
([(
x
,
shape_delta
,
gamma
)
for
x
in
roc
if
roc
])
if
__name__
==
'__main__'
:
for
i
,
rpath
in
enumerate
(
rdat_path
):
print
rpath
mpath
=
msa_path
[
i
]
print
mpath
ana
=
Analyze
(
rpath
)
ana
.
add_msa
(
msa_path
[
i
],
infernal_align
=
True
)
print
ana
.
resi_close
()
sys
.
exit
()
args
=
((
shape_delta
,
gamma
,
0
,
100
)
for
shape_delta
in
range
(
80
,
99
)
for
gamma
in
range
(
1
,
15
))
args
=
((
shape_delta
,
gamma
,
0
,
100
)
for
shape_delta
in
range
(
80
,
100
)
for
gamma
in
range
(
1
,
35
))
pool
=
Pool
(
processes
=
NB_PROCS
)
out
=
[]
for
x
in
pool
.
imap_unordered
(
slave_roc
,
args
):
...
...
Src/remu.py
View file @
59468891
import
__main__
__main__
.
pymol_argv
=
[
'pymol'
,
'-qc'
]
#import matplotlib
#matplotlib.use('PDF')
#matplotlib.rc('text', usetex=True)
#import __main__
#__main__.pymol_argv = ['pymol','-qc']
import
sys
import
os
...
...
@@ -8,10 +11,7 @@ import numpy as np
import
cPickle
from
time
import
time
from
itertools
import
combinations
,
product
import
matplotlib
matplotlib
.
use
(
'PDF'
)
matplotlib
.
rc
(
'text'
,
usetex
=
True
)
from
matplotlib
import
pyplot
as
plt
#from matplotlib import pyplot as plt
from
pprint
import
pprint
from
multiprocessing
import
Pool
from
subprocess
import
check_output
...
...
@@ -19,15 +19,48 @@ from tempfile import NamedTemporaryFile as NTF
#import pymol
import
networkx
as
nx
from
numpy
import
average
from
analyze
import
Analyze
PATH_REMU
=
'/
Users
/vreinh
arz
/Applications/remuRNA/remuRNA'
PATH_REMU
=
'/
home/mcb
/vreinh/Applications/remuRNA/remuRNA'
"""
rdat_path = ['../Data/5SRRNA_SHP_0002.rdat', '../Data/CIDGMP_SHP_0002.rdat']
msa_path = ['../Data/5SRRNA_SHP_0002_RF00001.stockholm.txt','../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt']
OUT_PATH
=
'../Data/analysed_remu.txt'
NB_PROCS
=
4
OUT_PATH = '../Data/analyzed_remushape.txt'
rdat_path = ['../Data/CIDGMP_SHP_0002.rdat',
'../Data/CIDGMP_SHP_0002.only_cidgmp.rdat']
msa_path = ['../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',
'../Data/CIDGMP_SHP_0002_RF01051.stockholm.txt',]
"""
rdat_path
=
[
'../Data/GLYCFN_SHP_0002.rdat'
,
'../Data/GLYCFN_SHP_0003.rdat'
,
'../Data/GLYCFN_SHP_0004.rdat'
,
'../Data/GLYCFN_SHP_0005.rdat'
,
'../Data/ADDRSW_SHP_0002.rdat'
,
'../Data/ADDRSW_SHP_0003.rdat'
,
'../Data/ADDRSW_SHP_0004.rdat'
,
'../Data/TRNAPH_SHP_0002.rdat'
,
'../Data/RNAPZ6_1M7_0002.rdat'
,]
msa_path
=
[
'../Data/GLYCFN_SHP_0002_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0003_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0004_RF00504.stockholm.txt'
,
'../Data/GLYCFN_SHP_0005_RF00504.stockholm.txt'
,
'../Data/ADDRSW_SHP_0002_RF00167.stockholm.txt'
,
'../Data/ADDRSW_SHP_0003_RF00167.stockholm.txt'
,
'../Data/ADDRSW_SHP_0004_RF00167.stockholm.txt'
,
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt'
,
'../Data/RNAPZ6_1M7_0002_RF00174.stockholm.txt'
]
rdat_path
=
[
'../Data/TRNAPH_SHP_0002.rdat'
]
msa_path
=
[
'../Data/TRNAPH_SHP_0002_RF00005.stockholm.txt'
]
OUT_PATH
=
'../Data/analyzed_trna_remu.txt'
NB_PROCS
=
5
class
Remu
(
Analyze
):
"""Class to plot and analyze data from Arnhack"""
...
...
@@ -42,25 +75,44 @@ class Remu(Analyze):
(
'3OAS'
,
'B'
,
0
),
(
'3OFC'
,
'B'
,
0
),
(
'3ORB'
,
'B'
,
0
)],
'ADDRSW_SHP_0002'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
26
'
,
'X'
,
-
12
),
(
'1Y
27
'
,
'X'
,
-
12
),
(
'
2G9C
'
,
'
A
'
,
-
12
)],
'ADDRSW_SHP_0002'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0003'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'ADDRSW_SHP_0004'
:[(
'1Y
AD
'
,
'X'
,
-
12
),
(
'1Y
MG
'
,
'X'
,
-
12
),
(
'
1YAL
'
,
'
X
'
,
-
12
)],
'CIDGMP_SHP_0002'
:[(
'3MXH'
,
'R'
,
-
8
),
(
'3IWN'
,
'A'
,
2
),
(
'3MUV'
,
'R'
,
-
8
),
(
'3MUT'
,
'R'
,
-
8
)],
'GLYCFN_SHP_0002'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0003'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0004'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0005'
:[(
'3P49'
,
'A'
,
0
)],
'GLYCFN_SHP_0002'
:[(
'3PGM'
,
'A'
,
0
),
#gly+mg
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0003'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0004'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'GLYCFN_SHP_0005'
:[(
'3PGM'
,
'A'
,
0
),
(
'3PGL'
,
'A'
,
0
),
#gly
(
'3PGP'
,
'A'
,
0
),
#gly + prot
(
'3PAL'
,
'A'
,
0
)],
#gly + mg + prot
'TRNAPH_SHP_0002'
:[(
'1EHZ'
,
'A'
,
-
1
)],
}
'RNAPZ6_1M7_0002'
:[(
'4GAL'
,
'A'
,
-
3
),
#all
(
'4GB1'
,
'A'
,
-
3
),
#B12
(
'4GBI'
,
'A'
,
-
3
),
#B12 + IRI
(
'4GBM'
,
'A'
,
-
3
),
#B12 + MG
(
'4GIM'
,
'A'
,
-
3
),
#IRI + MG
(
'4GIR'
,
'A'
,
-
3
),
#IRI
(
'4GMG'
,
'A'
,
-
3
)],
#MG
}
...
...
@@ -79,7 +131,7 @@ class Remu(Analyze):
try
:
out
=
check_output
([
PATH_REMU
,
tmp_file
.
name
])
except
:
print
'
wtf'
print
'
remuRNA problem:'
,
sys
.
exc_info
()[
0
]
os
.
remove
(
tmp_file
.
name
)
return
os
.
remove
(
tmp_file
.
name
)
...
...
@@ -99,9 +151,19 @@ class Remu(Analyze):
data
=
self
.
get_roc
(
shape_delta
,
gamma
,
zetas_min
,
zetas_max
)
roc
=
[]
for
pdb_id
in
data
:
rna
=
os
.
path
.
basename
(
self
.
path
).
rsplit
(
'.'
)[
0
]
for
pdb_id
,
chain
,
offset
in
sorted
(
self
.
d
[
rna
],
key
=
lambda
x
:
x
[
0
]):
if
pdb_id
not
in
data
:
roc
.
append
(
np
.
nan
)
continue
if
data
[
pdb_id
][
-
1
]
!=
(
1
,
1
):
data
[
pdb_id
].
append
(
1
,
1
)
if
data
[
pdb_id
][
0
]
!=
(
0
,
0
):
data
[
pdb_id
]
=
[(
0
,
0
)]
+
data
[
pdb_id
]
to_plot
=
np
.
array
(
data
[
pdb_id
])
roc
.
append
(
np
.
sum
((
to_plot
[:
-
1
,
1
]
+
to_plot
[
1
:,
1
])
*
(
to_plot
[
1
:,
0
]
-
to_plot
[:
-
1
,
0
]))
/
2
)
"""
plt.plot(to_plot[:,0], to_plot[:,1], color=colors[pdb_id], lw=3, label=pdb_id)
plt.plot([0,1], [0,1], '-k')
plt.ylabel('Sensitivity', fontsize=16)
...
...
@@ -111,8 +173,13 @@ class Remu(Analyze):
plt.title("Best AUC %.3f" % max(roc) if roc else 0, fontsize=16)
plt.legend(loc='lower right')
plt.show()
plt
.
savefig
(
'%s_remu_%s_%s'
%
(
os
.
path
.
basename
(
self
.
path
).
split
(
'_'
,
1
)[
0
],
shape_delta
,
gamma
))
return
tuple
(
roc
)
if
roc
else
0
plt.savefig('%s_%s_%s' % (os.path.basename(self.path).split('_',1)[0],shape_delta, gamma))
plt.clf()
"""
#return max(roc) if roc else 0
return
tuple
(
roc
)
if
roc
else
(
0
,)
class
RemuSHAPE
(
Analyze
):
"""Class to plot and analyze data from Arnhack"""
...
...
@@ -187,9 +254,19 @@ class RemuSHAPE(Analyze):
data
=
self
.
get_roc
(
shape_delta
,
gamma
,
zetas_min
,
zetas_max
)
roc
=
[]
for
pdb_id
in
data
:
rna
=
os
.
path
.
basename
(
self
.
path
).
rsplit
(
'.'
)[
0
]
for
pdb_id
,
chain
,
offset
in
sorted
(
self
.
d
[
rna
],
key
=
lambda
x
:
x
[
0
]):
if
pdb_id
not
in
data
: