Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roman Sarrazin-Gendron
BP6.1
Commits
31c4083e
Commit
31c4083e
authored
Jul 05, 2018
by
Roman Sarrazin-Gendron
Browse files
added window scanning
parent
b36af7b8
Changes
6
Hide whitespace changes
Inline
Side-by-side
models/all_21+_rna3dmotif_106_
_
PDB_names.cPickle
→
models/all_21+_rna3dmotif_106_PDB_names.cPickle
View file @
31c4083e
File moved
models/all_21+_rna3dmotif_106_
_
PDB_positions.cPickle
→
models/all_21+_rna3dmotif_106_PDB_positions.cPickle
View file @
31c4083e
File moved
models/test_pickle.py
View file @
31c4083e
...
...
@@ -3,7 +3,7 @@ import networkx as nx
from
matplotlib
import
pyplot
as
plt
#g = pickle.load(open("all_graphs_pickled/" + "1FFK" + ".nxpickled", "rb"))
PDBid
=
"1
EBR
.A"
PDBid
=
"1
Q7Y
.A"
#print("PDB")
#print(PDBid)
PDB
,
chain
=
PDBid
.
split
(
"."
)
...
...
src/carnaval_to_rmdetect.py
View file @
31c4083e
...
...
@@ -63,6 +63,65 @@ def get_consensus_sequence(seqs):
consensus
.
append
(
best
)
return
consensus
def
get_PDB_sequence
(
PDBid
):
print
(
"PDB"
)
print
(
PDBid
)
PDB
,
chain
=
PDBid
.
split
(
"."
)
#print(PDB)
#print("../all_graphs_pickled/" + PDB + ".nxpickled")
try
:
g
=
pickle
.
load
(
open
(
"../models/all_graphs_pickled/"
+
PDB
+
".nxpickled"
,
"rb"
))
except
FileNotFoundError
:
print
(
"PDB FILE NOT FOUND"
)
return
(
""
,
0
)
seq
=
""
nodes
=
[]
for
node
in
g
.
nodes
(
data
=
True
):
#print(node)
#print(node[0][0],chain)
if
node
[
0
][
0
]
==
chain
:
nodecode
=
(
node
[
0
][
1
])
nodes
.
append
((
int
(
nodecode
),
node
[
1
][
"nt"
]))
sortednodes
=
sorted
(
list
(
nodes
))
nuc_by_node
=
{}
missing_nuc
=
False
#print("NODES")
numericals
=
[
x
[
0
]
for
x
in
sortednodes
]
decalage
=
0
if
1
not
in
numericals
:
decalage
=
decalage
+
1
sortednodes
.
append
((
1
,
"N"
))
sortednodes
=
sorted
(
sortednodes
)
#missing_nuc=True
#decalage = decalage +1
newnodes
=
[]
#for i in sortednodes:
# newnodes.append((i[0],i[1]))
#sortednodes = sorted(list(newnodes))
numericals
=
[
x
[
0
]
for
x
in
sortednodes
]
#print("MISSING 1", PDBid)
#print(numericals)
#
#sortednodes=sorted(sortednodes)
#numericals = [x[0]-1 for x in sortednodes]
#numericals.insert(0,0)
#else:
#print("NOT MISSING", PDBid)
for
i
in
sortednodes
:
nuc_by_node
[
i
[
0
]]
=
i
[
1
]
#print(sortednodes)
for
i
in
range
(
1
,
int
(
sortednodes
[
-
1
][
0
])
+
1
):
if
i
not
in
numericals
:
"NOT IN NODES"
seq
=
seq
+
"-"
else
:
seq
=
seq
+
nuc_by_node
[
i
]
ss
=
g
.
graph
[
'ss'
]
#print(seq)
#print("MISSING_NUC",PDBid,missing_nuc)
return
(
seq
,
decalage
)
with
open
(
"seq.fasta"
,
"a"
)
as
f
:
for
i
in
seqs
:
f
.
write
(
">seq"
+
str
(
i
)
+
"
\n
"
)
...
...
@@ -78,6 +137,14 @@ with open("seq.fasta", "a") as f:
print
(
graphs
[
module
][
2
].
nodes
(
data
=
True
))
ss
=
get_ss_from_graph
(
graphs
[
module
][
0
])
print
(
ss
)
print
(
PDBs
[
module
][
2
])
print
(
PDB_positions
[
module
][
2
])
print
(
PDBs
[
module
][
86
])
print
(
PDB_positions
[
module
][
86
])
#for pos,i in enumerate(PDBs[module][1:]):
# #print(i)
# seq,dec = get_PDB_sequence(".".join((i.split(".")[:2])))
# print(pos, len(seq))
#TEST : 86
src/module_1.stk
View file @
31c4083e
...
...
@@ -876,6 +876,4 @@ seq290 CUUCGG
seq291 CUUCGG
#=GS seq291 AC seq291
#=GS seq291 DE seq291
REF_SEQ CUUCGG
#=GC SS_cons <....>
//
src/parse_sequences.py
View file @
31c4083e
...
...
@@ -69,13 +69,19 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}):
input
=
str
(
input
).
replace
(
"T"
,
"U"
)
if
len
(
input
)
<=
300
:
maxs
=
run_BP
(
input
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
k
)
print
(
"FINAL RESULTS:"
)
print
(
maxs
)
else
:
maxs
=
run_BP
(
input
[
len
(
seq
)
-
300
:],
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
k
)
print
(
maxs
)
all_maxes
=
[]
index
=
0
while
index
+
100
<
len
(
seq
):
maxs
=
run_BP
(
input
[
index
:
index
+
100
],
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
k
)
#print(maxs)
all_maxes
.
append
(
maxs
)
maxs
=
run_BP
(
input
[
index
:],
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
k
)
all_maxes
.
append
(
maxs
)
print
(
"FINAL RESULTS:"
)
print
(
all_maxes
)
if
__name__
==
"__main__"
:
arguments
=
{}
...
...
@@ -124,7 +130,7 @@ if __name__ == "__main__":
graphs
=
pickle
.
load
(
open
(
"../models/"
+
dataset
+
"_one_of_each_graph.cPickle"
,
"rb"
))
run_fasta
(
seq
,
range
(
len
(
graphs
)),
dataset
,
ss
,
arguments
)
#
run_fasta(seq,range(len(graphs)),dataset,ss,arguments)
#run_fasta(seq,[0,1],dataset,ss,arguments)
run_fasta
(
seq
,
2
,
dataset
,
ss
,
arguments
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment