Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roman Sarrazin-Gendron
RNABayesPairing2
Commits
e6a54362
Commit
e6a54362
authored
Mar 04, 2021
by
Roman Sarrazin-Gendron
Browse files
Fixed multiple seqs, TODO fix windows
parent
a0650d1a
Changes
1
Hide whitespace changes
Inline
Side-by-side
bayespairing/src/parse_sequences.py
View file @
e6a54362
...
@@ -122,21 +122,22 @@ def get_stats(prediction_scores,modules_to_parse,threshold=-5):
...
@@ -122,21 +122,22 @@ def get_stats(prediction_scores,modules_to_parse,threshold=-5):
for
sequence
in
prediction_scores
:
for
sequence
in
prediction_scores
:
hit_dict
=
{}
hit_dict
=
{}
n_sequences
=
n_sequences
+
1
n_sequences
=
n_sequences
+
1
for
window
in
prediction_scores
[
sequence
]:
#for window in prediction_scores[sequence]:
scored_positions
=
{}
window
=
prediction_scores
[
sequence
]
#print(window)
scored_positions
=
{}
for
module
in
window
:
#print(window)
scored_positions
[
module
]
=
[]
for
module
in
window
:
#print("SCANNING",module,window[module])
scored_positions
[
module
]
=
[]
#print("SCANNING",module,window[module])
mod_number
=
module
mod_number
=
module
if
len
(
window
[
module
])
>
0
:
if
len
(
window
[
module
])
>
0
:
max_score
=
sorted
(
window
[
module
],
key
=
itemgetter
(
2
),
reverse
=
True
)[
0
][
2
]
max_score
=
sorted
(
window
[
module
],
key
=
itemgetter
(
2
),
reverse
=
True
)[
0
][
2
]
if
max_score
>
threshold
and
window
[
module
][
0
][
1
]
not
in
scored_positions
[
module
]:
if
max_score
>
threshold
and
window
[
module
][
0
][
1
]
not
in
scored_positions
[
module
]:
#print("admissible score")
#print("admissible score")
scored_positions
[
module
].
append
(
window
[
module
][
0
][
1
])
scored_positions
[
module
].
append
(
window
[
module
][
0
][
1
])
hit_dict
[
module
]
=
True
hit_dict
[
module
]
=
True
for
mod
in
hit_dict
:
for
mod
in
hit_dict
:
if
hit_dict
[
mod
]:
if
hit_dict
[
mod
]:
n_hits
[
mod
]
+=
1
n_hits
[
mod
]
+=
1
...
@@ -258,7 +259,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
...
@@ -258,7 +259,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
if
len
(
sequences
[
0
])
<
3000
:
if
len
(
sequences
[
0
])
<
3000
:
maxs
=
run_BP
(
sequences
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
maxs
=
run_BP
(
sequences
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
first_run
=
False
first_run
=
False
prediction_scores
[
id
]
=
[
maxs
]
prediction_scores
[
id
]
=
maxs
if
interm
:
if
interm
:
print
(
maxs
)
print
(
maxs
)
...
@@ -330,7 +331,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
...
@@ -330,7 +331,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
if
len
(
sequences
[
0
])
<
3000
:
if
len
(
sequences
[
0
])
<
3000
:
maxs
=
run_BP
(
sequences
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
maxs
=
run_BP
(
sequences
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
first_run
=
False
first_run
=
False
prediction_scores
[
id
]
=
[
maxs
]
prediction_scores
[
id
]
=
maxs
if
interm
:
if
interm
:
print
(
maxs
)
print
(
maxs
)
...
@@ -398,7 +399,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
...
@@ -398,7 +399,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
first_run
=
False
first_run
=
False
if
interm
:
if
interm
:
print
(
maxs
)
print
(
maxs
)
prediction_scores
[
id
]
=
[
maxs
]
prediction_scores
[
id
]
=
maxs
else
:
else
:
all_maxes
=
[]
all_maxes
=
[]
...
@@ -452,7 +453,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
...
@@ -452,7 +453,7 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
if
len
(
seq
)
<
300
:
if
len
(
seq
)
<
300
:
maxs
=
run_BP
(
seq
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
maxs
=
run_BP
(
seq
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
aln
=
aln
,
t
=
t
,
samplesize
=
samplesize
,
pretrained
=
pretrained
,
Lambda
=
Lambda
,
Theta
=
Theta
,
Delta
=
Delta
,
fuzzy
=
fuzzy
,
verbose
=
verbose
,
first_run
=
first_run
)
first_run
=
False
first_run
=
False
prediction_scores
[
id
]
=
[
maxs
]
prediction_scores
[
id
]
=
maxs
else
:
else
:
all_maxes
=
[]
all_maxes
=
[]
...
@@ -573,15 +574,16 @@ def check_already_there(results, new_range, new_mod):
...
@@ -573,15 +574,16 @@ def check_already_there(results, new_range, new_mod):
def
present_output
(
all_maxes
,
threshold
,
offset
=
0
):
def
present_output
(
all_maxes
,
threshold
,
offset
=
0
):
OUTPUT_STRING
=
""
OUTPUT_STRING
=
""
output
=
[]
output
=
[]
for
m
in
all_maxes
:
#
for m in all_maxes:
#print("all_maxes",m)
#print("all_maxes",m)
for
current_module
in
sorted
(
m
.
keys
()):
m
=
all_maxes
if
len
(
m
[
current_module
])
<
1
:
for
current_module
in
sorted
(
m
.
keys
()):
continue
if
len
(
m
[
current_module
])
<
1
:
for
sub_max
in
m
[
current_module
]:
continue
this_max
=
[
round
(
sub_max
[
2
],
3
),
seq_ranges
(
sub_max
[
1
]),
sub_max
[
0
]]
for
sub_max
in
m
[
current_module
]:
if
this_max
[
0
]
>
threshold
and
not
check_already_there
(
output
,
this_max
,
current_module
):
this_max
=
[
round
(
sub_max
[
2
],
3
),
seq_ranges
(
sub_max
[
1
]),
sub_max
[
0
]]
output
.
append
([
"|"
,
current_module
,
*
this_max
,
"|"
])
if
this_max
[
0
]
>
threshold
and
not
check_already_there
(
output
,
this_max
,
current_module
):
output
.
append
([
"|"
,
current_module
,
*
this_max
,
"|"
])
output
=
sorted
(
output
)
output
=
sorted
(
output
)
#print(output)
#print(output)
OUTPUT_STRING
=
OUTPUT_STRING
+
(
"=========================================================================================
\n
"
)
OUTPUT_STRING
=
OUTPUT_STRING
+
(
"=========================================================================================
\n
"
)
...
@@ -708,8 +710,16 @@ if __name__ == "__main__":
...
@@ -708,8 +710,16 @@ if __name__ == "__main__":
print
(
toPrint
)
print
(
toPrint
)
#generate SVG
#generate SVG
outName
=
arguments
[
"o"
]
for
seqCounter
,
inputSeqKey
in
enumerate
(
list
(
all_results
.
keys
())):
for
seqCounter
,
inputSeqKey
in
enumerate
(
list
(
all_results
.
keys
())):
modules_in_svg
,
chef_ss
=
bp_chefs_choice
(
all_results
[
inputSeqKey
],
seqInfo
[
seqCounter
],
arguments
[
"t"
],
arguments
[
"o"
])
if
seqCounter
>
0
:
finalName
=
outName
+
str
(
counter
)
else
:
finalName
=
outName
#print("THE HITS")
#print(all_results[inputSeqKey])
modules_in_svg
,
chef_ss
=
bp_chefs_choice
(
all_results
[
inputSeqKey
],
seqInfo
[
seqCounter
],
arguments
[
"t"
],
finalName
)
#now we need to fill svg hits
#now we need to fill svg hits
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment