Commit 721efa24 authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

run by run debugging

parent 281c9050
...@@ -98,7 +98,8 @@ def parse_sequence(seq,modules,ss,dataset,left_out,sm=0.25, mc=0,k=5000): ...@@ -98,7 +98,8 @@ def parse_sequence(seq,modules,ss,dataset,left_out,sm=0.25, mc=0,k=5000):
return_dict[mod] = [this_score,constraints] return_dict[mod] = [this_score,constraints]
return return_dict return return_dict
def returner(scores,seq,ss="",m=4,n=4): def returner(scores,seq,ss="",m=5,n=5):
print("SEQUENCE SCORES:",scores)
if ss=="": if ss=="":
maxs = [] maxs = []
for i in scores.keys(): for i in scores.keys():
...@@ -109,7 +110,7 @@ def returner(scores,seq,ss="",m=4,n=4): ...@@ -109,7 +110,7 @@ def returner(scores,seq,ss="",m=4,n=4):
returner = [] returner = []
final_top = {} final_top = {}
for z in topitems: for z in topitems:
if topitems[z]>11: if z[1]>-10:
poz = ast.literal_eval(z[0]) poz = ast.literal_eval(z[0])
current_score = z[1] current_score = z[1]
cons = scores[i][1] cons = scores[i][1]
...@@ -122,7 +123,7 @@ def returner(scores,seq,ss="",m=4,n=4): ...@@ -122,7 +123,7 @@ def returner(scores,seq,ss="",m=4,n=4):
cons_seq = cons_seq + (cons_dict[nuc]) cons_seq = cons_seq + (cons_dict[nuc])
else: else:
cons_seq= cons_seq + ('.') cons_seq= cons_seq + ('.')
#print(cons_seq) print(cons_seq)
if "." in cons_seq: if "." in cons_seq:
#start1 = time.time() #start1 = time.time()
nocons_score = testSS.call_rnafold(seq) nocons_score = testSS.call_rnafold(seq)
......
...@@ -370,7 +370,8 @@ def seq_to_struct( ...@@ -370,7 +370,8 @@ def seq_to_struct(
component_distance = [] component_distance = []
for s in range(len(position_subsets)-1): for s in range(len(position_subsets)-1):
component_distance.append([150,-1]) component_distance.append([5,20])
print("OBSERVED COMPONENT DISTANCES")
for gr in graphs: for gr in graphs:
newcols = [] newcols = []
positions = [] positions = []
...@@ -382,11 +383,12 @@ def seq_to_struct( ...@@ -382,11 +383,12 @@ def seq_to_struct(
for junc in range(len(component_addresses)): for junc in range(len(component_addresses)):
distance = cols[component_addresses[junc]]-cols[component_addresses[junc]-1] distance = cols[component_addresses[junc]]-cols[component_addresses[junc]-1]
print(distance)
if distance < component_distance[junc][0]: if distance < component_distance[junc][0]:
component_distance[junc][0] = distance component_distance[junc][0] = distance
if distance > component_distance[junc][1]: if distance > component_distance[junc][1]:
component_distance[junc][1] = distance component_distance[junc][1] = distance
(m, iii, l) = best_struct (m, iii, l) = best_struct
print("III") print("III")
...@@ -403,7 +405,7 @@ def seq_to_struct( ...@@ -403,7 +405,7 @@ def seq_to_struct(
if dis[1]<4: if dis[1]<4:
dis[1]=5 dis[1]=5
if len(re_call) > 0: if len(re_call) > 0:
re_call = re_call + r")([ACGU]{"+str(4)+","+str(dis[1])+"})" re_call = re_call + r")([ACGU]{"+str(dis[0])+","+str(dis[1])+"})"
if len(i) == 1: if len(i) == 1:
cons = [convert[int(k)] for k in list(iii[i[0]])] cons = [convert[int(k)] for k in list(iii[i[0]])]
if len(cons) == 1: if len(cons) == 1:
......
...@@ -26,7 +26,7 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}): ...@@ -26,7 +26,7 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}):
if "sm" in arguments: if "sm" in arguments:
sm=arguments["sm"] sm=arguments["sm"]
else: else:
sm=0.2 sm=0.9
if "mc" in arguments: if "mc" in arguments:
mc=arguments["mc"] mc=arguments["mc"]
else: else:
...@@ -34,7 +34,7 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}): ...@@ -34,7 +34,7 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}):
if "k" in arguments: if "k" in arguments:
k=arguments["k"] k=arguments["k"]
else: else:
k=5000 k=50000
seq = "" seq = ""
if "fa" in input: if "fa" in input:
prediction_scores = {} prediction_scores = {}
...@@ -75,19 +75,20 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}): ...@@ -75,19 +75,20 @@ def run_fasta(input, modules_to_parse,dataset,ss = "",arguments={}):
else: else:
all_maxes=[] all_maxes=[]
index = 0 index = 0
while index+200<len(input): while index+50<len(input):
print("Running Bayespairing on sequence:",input[index:index+200]) print("Running Bayespairing on sequence parts:", index-50,index+50)
#maxs=[] #maxs=[]
maxs = run_BP(input[index:index+200], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k) bf = max(0,index-50)
maxs = run_BP(input[bf:index+50], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
corrected_maxes = [] corrected_maxes = []
print(maxs) print(maxs)
for ind,cand in enumerate(maxs[0]): for ind,cand in enumerate(maxs[0]):
if ind%2==1: if ind%2==1:
corrected_maxes.append([i+index for i in cand]) corrected_maxes.append([i+bf for i in cand])
else: else:
corrected_maxes.append(cand) corrected_maxes.append(cand)
all_maxes.append(corrected_maxes) all_maxes.append(corrected_maxes)
index=index+200 index=index+50
maxs = run_BP(input[index:], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k) maxs = run_BP(input[index:], ss, modules_to_parse, dataset, "NONE",m,n,sm,mc,k)
corrected_maxes = [] corrected_maxes = []
for ind,cand in enumerate(maxs[0]): for ind,cand in enumerate(maxs[0]):
...@@ -149,5 +150,5 @@ if __name__ == "__main__": ...@@ -149,5 +150,5 @@ if __name__ == "__main__":
#run_fasta(seq,range(len(graphs)),dataset,ss,arguments) #run_fasta(seq,range(len(graphs)),dataset,ss,arguments)
#run_fasta(seq,[0,1],dataset,ss,arguments) #run_fasta(seq,[0,1],dataset,ss,arguments)
run_fasta(seq,[2],dataset,ss,arguments) run_fasta(seq,[7],dataset,ss,arguments)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment