Commit 31c4083e authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

added window scanning

parent b36af7b8
......@@ -3,7 +3,7 @@ import networkx as nx
from matplotlib import pyplot as plt
#g = pickle.load(open("all_graphs_pickled/" + "1FFK" + ".nxpickled", "rb"))
PDBid = "1EBR.A"
PDBid = "1Q7Y.A"
#print("PDB")
#print(PDBid)
PDB, chain = PDBid.split(".")
......
......@@ -63,6 +63,65 @@ def get_consensus_sequence(seqs):
consensus.append(best)
return consensus
def get_PDB_sequence(PDBid):
print("PDB")
print(PDBid)
PDB, chain = PDBid.split(".")
#print(PDB)
#print("../all_graphs_pickled/" + PDB + ".nxpickled")
try:
g = pickle.load(open("../models/all_graphs_pickled/" + PDB + ".nxpickled", "rb"))
except FileNotFoundError:
print("PDB FILE NOT FOUND")
return ("",0)
seq=""
nodes= []
for node in g.nodes(data=True):
#print(node)
#print(node[0][0],chain)
if node[0][0]==chain:
nodecode = (node[0][1])
nodes.append((int(nodecode),node[1]["nt"]))
sortednodes = sorted(list(nodes))
nuc_by_node={}
missing_nuc = False
#print("NODES")
numericals = [x[0] for x in sortednodes]
decalage = 0
if 1 not in numericals:
decalage=decalage+1
sortednodes.append((1,"N"))
sortednodes = sorted(sortednodes)
#missing_nuc=True
#decalage = decalage +1
newnodes = []
#for i in sortednodes:
# newnodes.append((i[0],i[1]))
#sortednodes = sorted(list(newnodes))
numericals = [x[0] for x in sortednodes]
#print("MISSING 1", PDBid)
#print(numericals)
#
#sortednodes=sorted(sortednodes)
#numericals = [x[0]-1 for x in sortednodes]
#numericals.insert(0,0)
#else:
#print("NOT MISSING", PDBid)
for i in sortednodes:
nuc_by_node[i[0]]=i[1]
#print(sortednodes)
for i in range(1,int(sortednodes[-1][0])+1):
if i not in numericals:
"NOT IN NODES"
seq = seq+"-"
else:
seq = seq + nuc_by_node[i]
ss = g.graph['ss']
#print(seq)
#print("MISSING_NUC",PDBid,missing_nuc)
return(seq,decalage)
with open("seq.fasta", "a") as f:
for i in seqs:
f.write(">seq" + str(i) + "\n")
......@@ -78,6 +137,14 @@ with open("seq.fasta", "a") as f:
print(graphs[module][2].nodes(data=True))
ss= get_ss_from_graph(graphs[module][0])
print(ss)
print(PDBs[module][2])
print(PDB_positions[module][2])
print(PDBs[module][86])
print(PDB_positions[module][86])
#for pos,i in enumerate(PDBs[module][1:]):
# #print(i)
# seq,dec = get_PDB_sequence(".".join((i.split(".")[:2])))
# print(pos, len(seq))
#TEST : 86
......@@ -876,6 +876,4 @@ seq290 CUUCGG
seq291 CUUCGG
#=GS seq291 AC seq291
#=GS seq291 DE seq291
REF_SEQ CUUCGG
#=GC SS_cons <....>
//
......@@ -69,13 +69,19 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}):
input = str(input).replace("T","U")
if len(input)<=300:
maxs = run_BP(input, ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
print("FINAL RESULTS:")
print(maxs)
else:
maxs = run_BP(input[len(seq)-300:], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
print(maxs)
all_maxes=[]
index = 0
while index+100<len(seq):
maxs = run_BP(input[index:index+100], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
#print(maxs)
all_maxes.append(maxs)
maxs = run_BP(input[index:], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
all_maxes.append(maxs)
print("FINAL RESULTS:")
print(all_maxes)
if __name__ == "__main__":
arguments = {}
......@@ -124,7 +130,7 @@ if __name__ == "__main__":
graphs = pickle.load(open("../models/"+dataset+"_one_of_each_graph.cPickle", "rb"))
run_fasta(seq,range(len(graphs)),dataset,ss,arguments)
#run_fasta(seq,range(len(graphs)),dataset,ss,arguments)
#run_fasta(seq,[0,1],dataset,ss,arguments)
run_fasta(seq,2,dataset,ss,arguments)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment