Commit d04f4949 authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

fixed multi-seq, fixed windows, fixed stats, deverbosed chefschoice

parent e092a727
......@@ -24,7 +24,7 @@ def energy_of_score(score):
def strands_from_graph(G):
print(G.edges(data=True))
#print(G.edges(data=True))
backbones = [(i, j) for (i, j, data) in G.edges(data=True) if data['label'] == 'B53']
H = G.edge_subgraph(backbones)
strands = [sorted(list(s)) for s in sorted(nx.connected_components(H.to_undirected()), key=min)]
......@@ -32,8 +32,8 @@ def strands_from_graph(G):
class BPCandidate:
def __init__(self, name, score, position, seq,moduleInfo):
print('-------------')
print(name)
#print('-------------')
#print(name)
self.name = name
self.energy = energy_of_score(score)
self.score = score
......@@ -70,8 +70,8 @@ class BPCandidate:
module = MODULES[self.name]
strands = strands_from_graph(module)
pos_map = {}
print(self.get_positions())
print(strands)
#print(self.get_positions())
#print(strands)
for i in range(len(strands)):
start = strands[i][0]
new_start = self.get_positions()[i][0]
......@@ -295,11 +295,11 @@ def main(seq, candidateLst, NAME):
print(seq)
print(min_result[1])
for i in matches:
print(i.name)
#for i in matches:
#print(i.name)
# print(i.score)
# print(i.real_pos)
print(i.moduleInfo)
#print(i.moduleInfo)
return [(i.name,i.moduleInfo) for i in matches], min_result[1]
def parse_candidates_from_dist(dist, min_score=2):
......@@ -315,7 +315,7 @@ def parse_sequences_file(fpath):
seqPtn = re.compile('[ACGUS-]+')
with open(fpath) as f:
res = [seqPtn.findall(line)[-1] for line in f.readlines() if not line[0] == '#']
print([len(w) for w in res])
#print([len(w) for w in res])
return res
......
......@@ -145,7 +145,9 @@ def get_stats(prediction_scores,modules_to_parse,threshold=-5):
output = []
#print("HITS",n_hits)
for m in n_hits:
output.append(["|", m, n_hits[m],round(n_hits[m]/n_sequences,2), "|"])
avgHits = round(n_hits[m]/n_sequences,2)
if avgHits >0 :
output.append(["|", m, n_hits[m],avgHits, "|"])
OUTPUT_STRING=OUTPUT_STRING+("=========================================\n")
headers = ["|", "MODULE", "N HITS", "PERCENTAGE", "|"]
output.insert(0, headers)
......@@ -154,6 +156,16 @@ def get_stats(prediction_scores,modules_to_parse,threshold=-5):
OUTPUT_STRING=OUTPUT_STRING+"=========================================\n"
return OUTPUT_STRING
def aggregate(maxs,all_maxes):
#print("ALL",all_maxes)
#print("maxs",maxs)
for mod in maxs:
if mod not in all_maxes:
all_maxes[mod] = maxs[mod]
else:
for j in maxs[mod]:
all_maxes[mod].append(j)
return all_maxes
def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
......@@ -256,7 +268,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
sequences.append((seq,ugseq))
#print("SEQUENCES",len(sequences))
if len(sequences[0]) < 3000:
if len(sequences[0]) < 250:
maxs = run_BP(sequences, ss, modules_to_parse, dataset, "NONE", aln= aln, t= t, samplesize=samplesize, pretrained=pretrained, Lambda=Lambda, Theta=Theta, Delta=Delta, fuzzy=fuzzy, verbose=verbose, first_run=first_run)
first_run=False
prediction_scores[id] = maxs
......@@ -264,7 +276,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
print(maxs)
else:
all_maxes = []
all_maxes = {}
index = 0
while index + w < len(seq):
if verbose:
......@@ -282,7 +294,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
index = index + s
if verbose:
print("Running BayesPairing on sequence window:", index, len(seq))
......@@ -297,7 +310,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
prediction_scores[id] = all_maxes
# print("PREDICTION_SCORES",prediction_scores)
for id in prediction_scores:
......@@ -328,7 +342,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
sequences.append((seq,ugseq))
#print("SEQUENCES",len(sequences))
if len(sequences[0]) < 3000:
if len(sequences[0]) < 250:
maxs = run_BP(sequences, ss, modules_to_parse, dataset, "NONE", aln= aln, t= t, samplesize=samplesize, pretrained=pretrained, Lambda=Lambda, Theta=Theta, Delta=Delta, fuzzy=fuzzy, verbose=verbose, first_run=first_run)
first_run=False
prediction_scores[id] = maxs
......@@ -336,7 +350,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
print(maxs)
else:
all_maxes = []
all_maxes = {}
index = 0
while index + w < len(seq):
if verbose:
......@@ -354,7 +368,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
index = index + s
if verbose:
print("Running BayesPairing on sequence window:", index, len(seq))
......@@ -370,7 +385,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
prediction_scores[id] = all_maxes
# print("PREDICTION_SCORES",prediction_scores)
for id in prediction_scores:
......@@ -386,7 +402,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
print("FASTA file detected, scanning", input)
prediction_scores = {}
sequences = []
with open(input, "rU") as f:
with open(input, "r") as f:
for num, record in enumerate(SeqIO.parse(f, "fasta")):
id = record.id
seq = str(record.seq)
......@@ -394,7 +410,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
sequences.append(seq)
if "T" in seq:
seq = str(seq).replace("T", "U")
if len(seq)<300:
if len(seq)<250:
maxs = run_BP(seq, ss, modules_to_parse, dataset, "NONE", aln= aln, t= t, samplesize=samplesize, pretrained=pretrained, Lambda=Lambda, Theta=Theta, Delta=Delta, fuzzy=fuzzy, verbose=verbose, first_run=first_run)
first_run=False
if interm:
......@@ -402,7 +418,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
prediction_scores[id] = maxs
else:
all_maxes = []
all_maxes = {}
index = 0
while index + w < len(seq):
if verbose:
......@@ -415,7 +431,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
index = index + s
if verbose:
print("Running BayesPairing on sequence window:", index, len(seq))
......@@ -425,7 +442,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
prediction_scores[id] = all_maxes
for id in prediction_scores:
......@@ -456,7 +474,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
prediction_scores[id] = maxs
else:
all_maxes = []
all_maxes = {}
index = 0
while index + w < len(seq):
if verbose:
......@@ -469,7 +487,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
index = index + s
if verbose:
print("Running BayesPairing on sequence window:", index, index + w)
......@@ -479,7 +498,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
prediction_scores[id] = all_maxes
for id in prediction_scores:
fOUTPUT=fOUTPUT+"\nRESULTS FOR ID "+id+"\n"
......@@ -504,7 +524,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
prediction_scores = {"input_seq":maxs}
pickle.dump(prediction_scores,open("../output/"+o+".pickle","wb"))
else:
all_maxes = []
all_maxes = {}
index = 0
while index + w < len(input):
if verbose:
......@@ -518,7 +538,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
index = index + s
if verbose:
print("Running Bayespairing on sequence window:", index, len(input))
......@@ -528,7 +549,8 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
for mod in maxs:
for cand in maxs[mod]:
cand[1] = [[k + bf for k in l] for l in cand[1]]
all_maxes.append(maxs)
#all_maxes.append(maxs)
all_maxes = aggregate(maxs,all_maxes)
fOUTPUT=fOUTPUT+present_output(all_maxes, t)+"\n"
prediction_scores = {"input_seq":all_maxes}
......@@ -715,7 +737,7 @@ if __name__ == "__main__":
outName = arguments["o"]
for seqCounter,inputSeqKey in enumerate(list(all_results.keys())):
if seqCounter>0:
finalName = outName+str(counter)
finalName = outName+str(seqCounter)
else:
finalName = outName
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment