Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roman Sarrazin-Gendron
RNABayesPairing2
Commits
237061c2
Commit
237061c2
authored
Nov 09, 2020
by
SarrazinG
Browse files
fixed bracket issue
parent
f7b657ef
Pipeline
#2
failed with stages
in 0 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bayespairing/src/BayesPairing.py
View file @
237061c2
...
...
@@ -60,9 +60,33 @@ def get_rotations(strands):
current_permutations
.
append
(
tuple
(
current_p
.
copy
()))
return
current_permutations
def
find_significant_columns
(
aln_sequences
):
#by sebastian will
def
parseRNAStructure
(
structure
,
*
,
opening
=
"([{<"
,
closing
=
")]}>"
):
stack
=
{
op
:
list
()
for
op
in
opening
}
bps
=
[
-
1
]
*
len
(
structure
)
for
i
,
c
in
enumerate
(
structure
):
for
(
op
,
cl
)
in
zip
(
opening
,
closing
):
if
c
==
op
:
stack
[
op
].
append
(
i
)
elif
c
==
cl
:
if
len
(
stack
[
op
])
==
0
:
raise
ParseError
(
"Unbalanced RNA dot-bracket structure reading "
+
cl
+
"."
)
j
=
stack
[
op
].
pop
()
bps
[
i
]
=
j
bps
[
j
]
=
i
for
op
in
opening
:
if
len
(
stack
[
op
])
>
0
:
raise
ParseError
(
"Unbalanced RNA dot-bracket structure reading "
+
op
+
"."
)
return
bps
def
find_significant_columns
(
aln_sequences
,
struct
):
align
=
aln_sequences
pairs
=
parseRNAStructure
(
struct
)
#print("BPs",pairs)
cantBeGood
=
[]
good_pos
=
[]
for
pos
in
range
(
len
(
align
[
0
])):
nuc_count
=
(
len
([
x
[
pos
]
for
x
in
align
])
-
[
x
[
pos
]
for
x
in
align
].
count
(
"-"
)
)
/
len
([
x
[
pos
]
for
x
in
align
])
...
...
@@ -70,11 +94,19 @@ def find_significant_columns(aln_sequences):
#print(nuc_count)
if
nuc_count
>
0.5
:
good_pos
.
append
(
pos
)
return
good_pos
else
:
cantBeGood
.
append
(
pos
)
if
pairs
[
pos
]
>-
1
:
cantBeGood
.
append
(
pairs
[
pos
])
only_good_pos
=
[
x
for
x
in
good_pos
if
x
not
in
cantBeGood
]
#print("good pos", good_pos)
return
only_good_pos
def
parse_alignment2
(
sequences
,
modules
,
ss
,
dataset
,
BNs
,
t
=-
3
,
samplesize
=
20000
,
Lambda
=
0.35
,
Theta
=
1
,
Delta
=
None
,
fuzzy
=
False
,
verbose
=
False
):
#print("ALIGNMENT SEQUENCES",sequences)
#
seqs = [x[0] for x in sequences]
seqs
=
[
x
[
0
]
for
x
in
sequences
]
fc
=
Fold
(
seqs
)
ss_mfe
,
mfe
,
fee
=
fc
.
constraint_folding
()
nb
=
samplesize
...
...
@@ -121,7 +153,7 @@ def parse_alignment2(sequences, modules, ss, dataset, BNs, t=-3, samplesize=2000
modules_predicted
=
{}
real_pos
=
find_significant_columns
(
seqs
)
#
real_pos = find_significant_columns(seqs)
for
ind
,
subopt_output
in
enumerate
(
ss
):
...
...
@@ -257,9 +289,17 @@ def parse_alignment(sequences, modules, ss, dataset, BNs, t=-5, samplesize=20000
BOLTZMANN_SUM
=
1
real_pos
=
find_significant_columns
(
seqs
)
real_pos
=
find_significant_columns
(
seqs
,
struct
)
real_ss
=
""
.
join
([
struct
[
pos
]
for
pos
in
real_pos
])
real_seq
=
""
.
join
([
sequences
[
0
][
0
][
pos
]
for
pos
in
real_pos
])
if
real_ss
==
""
:
print
(
"problematic struct"
,
struct
)
print
(
"accepted columns"
,
real_pos
)
print
(
"bps"
,
parseRNAStructure
(
struct
))
exit
()
struct
=
real_ss
tree
=
SSETree
.
from_bracket
(
struct
,
seq
=
real_seq
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment