Commit edf59b78 authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

BayesPairing 2.1

parent 4d14167e
......@@ -12,7 +12,7 @@ import time
import make_BN_from_carnaval as makeBN
def parse_sequence(seq, modules, ss, dataset, left_out, sm=0.3, mc=0, p=10000):
def parse_sequence(seq, modules, ss, dataset, left_out, sm=0.3, mc=0, p=25000):
pdbs = pickle.load(open("../models/" + dataset + "_PDB_names.cPickle", "rb"))
graphs = pickle.load(open("../models/" + dataset + "_one_of_each_graph.cPickle", "rb"))
return_dict = {}
......@@ -31,7 +31,7 @@ def parse_sequence(seq, modules, ss, dataset, left_out, sm=0.3, mc=0, p=10000):
pickle.dump((scores, return_struct, max), open("../models/best" + pickle_name , "wb"))
struct_list = pickle.load(open("../models/"+pickle_name, "rb"))
best_struct = pickle.load(open("../models/best" + pickle_name, "rb"))
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc)
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc, p)
scores[mod] = this_score
return_dict[mod] = [this_score, this_graph]
else:
......@@ -43,13 +43,13 @@ def parse_sequence(seq, modules, ss, dataset, left_out, sm=0.3, mc=0, p=10000):
pickle.dump((scores, return_struct, max), open("../models/best" + pickle_name, "wb"))
struct_list = pickle.load(open("../models/"+pickle_name, "rb"))
best_struct = pickle.load(open("../models/best" + pickle_name, "rb"))
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc)
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc, p)
scores[mod] = this_score
return_dict[mod] = [this_score, this_graph]
else:
struct_list = pickle.load(open("../models/"+pickle_name, "rb"))
best_struct = pickle.load(open("../models/"+"best" + pickle_name, "rb"))
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc)
this_score, time = bs.seq_to_struct(seq, motif, struct_list, best_struct, graphs[mod], ss, mc, p)
scores[mod] = this_score
return_dict[mod] = [this_score, this_graph]
......@@ -192,7 +192,6 @@ def returner(scores, seq, ss="", m=8, n=5, k=1, sscons=2):
maxs.append(returner)
return maxs
if __name__ == "__main__":
modules_to_parse = [216]
# modules_to_test =list(set(sorted([191, 194, 198, 216])))
......
......@@ -154,7 +154,8 @@ def score_struct(
candidate,
sample,
best_struct,
penalty
penalty,
p
):
(scores, return_struct, maxi) = best_struct
convert = {
......@@ -180,6 +181,9 @@ def score_struct(
score = float(scores[i])
size = float(len(nodes))
nullmodel = pow(0.25,size)
#normalizing score for sampling=25000 for interpretable results
score = 25000*score/p
if score!=0:
beforelog= (score-(score*2*penalty/size))
beforelog= beforelog/nullmodel
......@@ -420,25 +424,6 @@ def adjust_aln(best_struct,positions,better_positions):
new_best_struct = (full_sample,new_nucs_for_each_nodes,best_of_sample)
return new_best_struct
def preserve_rotation(position_subsets):
#print(position_subsets)
if len(position_subsets)<3:
#print("not a junction")
permutations = list(itertools.permutations(position_subsets))
return permutations
current_permutations = []
is_first = position_subsets[0]
current_p = position_subsets.copy()
for i in range(len(position_subsets)):
moved = current_p[0]
current_p.append(moved)
current_p.pop(0)
#print("current_p",current_p)
current_permutations.append(current_p.copy())
#print("current_perm",current_permutations)
return current_permutations
def get_position_permutations(position_subsets,component_distance):
final_position_permutations = []
to_permute = []
......@@ -456,10 +441,7 @@ def get_position_permutations(position_subsets,component_distance):
for position in range(len(component_distance)+1):
if position not in set(permuted):
not_permuted.append(position)
#permutations = list(itertools.permutations(to_permute))
permutations = preserve_rotation(to_permute)
#print("PERMUTATIONS",permutations)
# print(preserve_rotation(to_permute))
permutations = list(itertools.permutations(to_permute))
for perm in permutations:
this_perm = []
perm_counter = 0
......@@ -499,7 +481,8 @@ def seq_to_struct(
best_struct,
graphs,
loops,
mc=0
mc=0,
p=25000
):
"""
This function addresses the common case in which the input is not a Bayesian structure, but a sequence.
......@@ -508,6 +491,8 @@ def seq_to_struct(
:param motif: BayesianModel object
:return: a likelihood score
"""
#candidates that have been scored so far
scored_candidates = {}
(scores, return_struct, max) = best_struct
newcols = []
......@@ -587,7 +572,6 @@ def seq_to_struct(
g_distance=[]
h_distance = []
j_distance = []
k_distance = []
for junc in component_distance:
a=junc[0]
b=junc[1]
......@@ -597,8 +581,6 @@ def seq_to_struct(
f= max(int((a+b)/6),c+0)
h= int(1.5*b)
j = int(1.2*b)
k = max(a,len(seq))
reduced_component_distance.append([a,c])
halfway_component_distance.append([a,d])
......@@ -607,7 +589,6 @@ def seq_to_struct(
g_distance.append([a,c])
h_distance.append([a,h])
j_distance.append([a,j])
k_distance.append([a,k])
permutations = get_permutations(position_subsets,component_distance,iii)
regex_list = []
for perm in permutations:
......@@ -622,7 +603,6 @@ def seq_to_struct(
res6 = build_regex(position_subsets,g_distance, node_dict, convert, positions, mc)
res7 = build_regex(position_subsets,h_distance, node_dict, convert, positions, mc)
res8 = build_regex(position_subsets,j_distance, node_dict, convert, positions, mc)
res9 = build_regex(position_subsets,k_distance, node_dict, convert, positions, mc)
regex_list.append([res,position_subsets,node_dict])
regex_list.append([res2,position_subsets,node_dict])
......@@ -632,7 +612,6 @@ def seq_to_struct(
regex_list.append([res6,position_subsets,node_dict])
regex_list.append([res7,position_subsets,node_dict])
regex_list.append([res8,position_subsets,node_dict])
regex_list.append([res9,position_subsets,node_dict])
output = []
final_model = {}
......@@ -647,7 +626,6 @@ def seq_to_struct(
#used to do 0.4,0.6,0.8,1 for paper run
for subseq in [0.4,0.6,0.8,1]:
this_seq = seq[0:int(len(seq) * subseq)]
#print("CURRENT REGEX:",res[index])
r = regex.compile(res[index],flag=regex.BESTMATCH)
iters = r.finditer(str(this_seq), overlapped=True)
for a in iters:
......@@ -709,11 +687,16 @@ def seq_to_struct(
for position in final_model:
if position in loops:
penalty = penalty + 0
final_score = score_struct(motif, final_model, sample,
best_struct, penalty) # score the structure we generated and output that score
if str(final_model) in scored_candidates:
#print("saved time")
final_score = scored_candidates[str(final_model)]
else:
final_score = score_struct(motif, final_model, sample,
best_struct, penalty,p) # score the structure we generated and output that score
scored_candidates[str(final_model)] = final_score
output.append((final_score, module_pos,flatten_position_subsets(position_subsets)))
return output,start
return output,time
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment