Commit b06ef6f4 authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

fixed a bug where learning on a single graph would cause regex to be too constrained

parent f554cf23
......@@ -111,7 +111,7 @@ def returner(scores, seq, ss="", m=8, n=5, k=1, sscons=2):
cons_score = testSS.call_rnafold(seq, cons_seq)
quotient = (nocons_score / cons_score)
seq_score=current_score
current_score = seq_score + float((float(1) / float(k)) * math.log(quotient))
current_score = seq_score + float(float(k) * math.log(quotient))
final_top[str(poz)] = current_score
#if current_score>20:
# print(position_subsets,current_score,nocons_score,cons_score,quotient, seq_score,math.log(quotient))
......
......@@ -171,21 +171,25 @@ def score_struct(
candidate_list = []
for i in sorted_nodes:
candidate_list.append(candidate[i])
#print(candidate_list)
score = 0.0
test_string = r""
for i in candidate_list:
if i!=4:
test_string = test_string + str(i)
if test_string in scores.keys():
score = float(scores[test_string])
else:
score = -100
size = float(len(nodes))
nullmodel = pow(0.25,size)
#normalizing score for sampling=25000 for interpretable results
score = 25000*score/p
if score!=0:
beforelog= (score-(score*2*penalty/size))
beforelog= beforelog/nullmodel
#beforelog= (score-(score*2*penalty/size))
beforelog= score/nullmodel
if beforelog<=0:
logged= -1.0
else:
......@@ -596,9 +600,12 @@ def seq_to_struct(
halfway_component_distance.append([a,d])
e_distance.append([a,e])
f_distance.append([a,f])
g_distance.append([a,c])
h_distance.append([a,h])
j_distance.append([a,b])
g_distance.append([a,h])
h_distance.append([3,h])
j_distance.append([a,j])
h_distance.append([3,j])
permutations = get_permutations(position_subsets,component_distance,iii)
regex_list = []
for perm in permutations:
......@@ -703,6 +710,7 @@ def seq_to_struct(
else:
final_score = score_struct(motif, final_model, sample,
best_struct, penalty,p) # score the structure we generated and output that score
#print(len(module_pos),module_pos, final_score)
scored_candidates[str(final_model)] = final_score
output.append((final_score, module_pos,flatten_position_subsets(position_subsets)))
......
......@@ -14,7 +14,7 @@ def run_BP(seq, ss, modules_to_parse, dataset, left_out, m=8, n=5, sm=0.8, mc=3,
return_dict= BayesPairing.parse_sequence(seq, modules_to_parse, "", dataset, left_out, sm, mc, p)
#print(return_dict)
#print("now returning")
maxs = BayesPairing.returner(return_dict, seq, ss, m, n, sw, int(sscons))
maxs = BayesPairing.returner(return_dict, seq, ss, m, n, k=sw, sscons=int(sscons))
#print(maxs)
#end = timer.time()
#print("TIME USED:", end - time, "\n")
......@@ -95,8 +95,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
p = int(arguments["p"])
else:
p = 25000
if "k" in arguments:
k = float(arguments["k"])
if "sw" in arguments:
k = float(arguments["sw"])
else:
k = 1
if "t" in arguments:
......@@ -258,6 +258,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
all_maxes = []
for ind, mod in enumerate(modules_to_parse):
all_maxes.append([mod, *maxs[ind]])
#print(all_maxes)
fOUTPUT=fOUTPUT+present_output(all_maxes, t)+"\n"
pickle.dump(all_maxes,open("../output/"+o+".pickle","wb"))
else:
......@@ -299,6 +300,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
return fOUTPUT,sequences
def seq_ranges(all_pos,sequence):
#print(all_pos)
seq=list(sequence)
output_sequence=""
output_string = ""
......@@ -324,8 +326,12 @@ def seq_ranges(all_pos,sequence):
if all_pos[ind - 1] == (all_pos[ind - 2] + 1):
output_string = output_string + "-" + str(pos)
output_sequence = output_sequence+seq[ind]
else:
output_string = output_string + "-" +str(pos)
output_sequence = output_sequence + seq[ind]
else:
output_sequence = output_sequence+seq[ind]
#print(output_string)
return [output_string,output_sequence]
......@@ -381,7 +387,7 @@ if __name__ == "__main__":
parser.add_argument("-t", help="Score threshold for a module to be called. [0 to 35]. Default:25 ")
parser.add_argument("-w", help="Window Length [50 to 300]. Default:200 ")
parser.add_argument("-s", help="Step size between windows [10 to 200]. Default:100 ")
parser.add_argument("-sw", help="weight of the secondary structure analysis [0.8 to 2]. Default:1 ")
parser.add_argument("-sw", help="weight of the secondary structure analysis [0.1 to 2]. increasing this value increases the weight of the structure probability. Default:1 ")
parser.add_argument("-o", help="Name of the output file. Default: output ")
parser.add_argument("-interm", help="output the best intermediate results. (use -interm 1)")
parser.add_argument("-sscons", help="Constraint level of the module-secondary structure match. Integer from 0 to 5, 0 being most constrained")
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment