Commit 237061c2 authored by SarrazinG's avatar SarrazinG
Browse files

fixed bracket issue

parent f7b657ef
Pipeline #2 failed with stages
in 0 seconds
......@@ -60,9 +60,33 @@ def get_rotations(strands):
current_permutations.append(tuple(current_p.copy()))
return current_permutations
def find_significant_columns(aln_sequences):
#by sebastian will
def parseRNAStructure(structure, *, opening = "([{<", closing = ")]}>"):
stack = { op:list() for op in opening }
bps = [-1]*len(structure)
for i,c in enumerate(structure):
for (op,cl) in zip(opening,closing):
if c==op:
stack[op].append(i)
elif c==cl:
if len(stack[op]) == 0:
raise ParseError("Unbalanced RNA dot-bracket structure reading "+cl+".")
j = stack[op].pop()
bps[i] = j
bps[j] = i
for op in opening:
if len(stack[op]) > 0:
raise ParseError("Unbalanced RNA dot-bracket structure reading "+op+".")
return bps
def find_significant_columns(aln_sequences,struct):
align = aln_sequences
pairs = parseRNAStructure(struct)
#print("BPs",pairs)
cantBeGood = []
good_pos = []
for pos in range(len(align[0])):
nuc_count = (len([x[pos] for x in align])-[x[pos] for x in align].count("-") )/len([x[pos] for x in align])
......@@ -70,11 +94,19 @@ def find_significant_columns(aln_sequences):
#print(nuc_count)
if nuc_count>0.5:
good_pos.append(pos)
return good_pos
else:
cantBeGood.append(pos)
if pairs[pos]>-1:
cantBeGood.append(pairs[pos])
only_good_pos = [x for x in good_pos if x not in cantBeGood]
#print("good pos", good_pos)
return only_good_pos
def parse_alignment2(sequences, modules, ss, dataset, BNs, t=-3, samplesize=20000, Lambda=0.35, Theta=1, Delta=None, fuzzy=False, verbose=False):
#print("ALIGNMENT SEQUENCES",sequences)
#seqs = [x[0] for x in sequences]
seqs = [x[0] for x in sequences]
fc = Fold(seqs)
ss_mfe, mfe, fee = fc.constraint_folding()
nb = samplesize
......@@ -121,7 +153,7 @@ def parse_alignment2(sequences, modules, ss, dataset, BNs, t=-3, samplesize=2000
modules_predicted = {}
real_pos = find_significant_columns(seqs)
#real_pos = find_significant_columns(seqs)
for ind,subopt_output in enumerate(ss):
......@@ -257,9 +289,17 @@ def parse_alignment(sequences, modules, ss, dataset, BNs, t=-5, samplesize=20000
BOLTZMANN_SUM = 1
real_pos = find_significant_columns(seqs)
real_pos = find_significant_columns(seqs,struct)
real_ss = "".join([struct[pos] for pos in real_pos])
real_seq = "".join([sequences[0][0][pos] for pos in real_pos])
if real_ss == "":
print("problematic struct",struct)
print("accepted columns",real_pos)
print("bps",parseRNAStructure(struct))
exit()
struct = real_ss
tree = SSETree.from_bracket(struct,seq=real_seq)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment