Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roman Sarrazin-Gendron
RNABayesPairing
Commits
22cd100f
Commit
22cd100f
authored
Aug 30, 2019
by
Roman Sarrazin-Gendron
Browse files
improved accuracy on new sequences, alleviated overfitting to sequence distance
parent
c9d41e20
Changes
2
Hide whitespace changes
Inline
Side-by-side
bayespairing/src/bayes_to_seqpy.py
View file @
22cd100f
...
...
@@ -586,6 +586,10 @@ def seq_to_struct(
g_distance
=
[]
h_distance
=
[]
j_distance
=
[]
i_distance
=
[]
k_distance
=
[]
l_distance
=
[]
m_distance
=
[]
for
junc
in
component_distance
:
a
=
junc
[
0
]
b
=
junc
[
1
]
...
...
@@ -595,6 +599,7 @@ def seq_to_struct(
f
=
max
(
int
((
a
+
b
)
/
6
),
c
+
0
)
h
=
int
(
1.5
*
b
)
j
=
int
(
1.2
*
b
)
k
=
max
(
a
,
len
(
seq
))
reduced_component_distance
.
append
([
a
,
b
])
halfway_component_distance
.
append
([
a
,
d
])
...
...
@@ -603,8 +608,10 @@ def seq_to_struct(
g_distance
.
append
([
a
,
h
])
h_distance
.
append
([
3
,
h
])
j_distance
.
append
([
a
,
j
])
h_distance
.
append
([
3
,
j
])
i_distance
.
append
([
3
,
j
])
l_distance
.
append
([
a
,
c
])
k_distance
.
append
([
a
,
b
])
m_distance
.
append
([
a
,
k
])
permutations
=
get_permutations
(
position_subsets
,
component_distance
,
iii
)
regex_list
=
[]
...
...
@@ -620,6 +627,10 @@ def seq_to_struct(
res6
=
build_regex
(
position_subsets
,
g_distance
,
node_dict
,
convert
,
positions
,
mc
)
res7
=
build_regex
(
position_subsets
,
h_distance
,
node_dict
,
convert
,
positions
,
mc
)
res8
=
build_regex
(
position_subsets
,
j_distance
,
node_dict
,
convert
,
positions
,
mc
)
res9
=
build_regex
(
position_subsets
,
i_distance
,
node_dict
,
convert
,
positions
,
mc
)
res10
=
build_regex
(
position_subsets
,
l_distance
,
node_dict
,
convert
,
positions
,
mc
)
res11
=
build_regex
(
position_subsets
,
k_distance
,
node_dict
,
convert
,
positions
,
mc
)
res12
=
build_regex
(
position_subsets
,
m_distance
,
node_dict
,
convert
,
positions
,
mc
)
regex_list
.
append
([
res
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res2
,
position_subsets
,
node_dict
])
...
...
@@ -629,6 +640,10 @@ def seq_to_struct(
regex_list
.
append
([
res6
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res7
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res8
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res9
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res10
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res11
,
position_subsets
,
node_dict
])
regex_list
.
append
([
res12
,
position_subsets
,
node_dict
])
#print("REGEX:",regex_list)
output
=
[]
final_model
=
{}
...
...
bayespairing/src/parse_sequences.py
View file @
22cd100f
...
...
@@ -135,10 +135,11 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
seq
=
str
(
record
.
seq
)
seq
=
seq
.
upper
()
sequences
.
append
(
seq
)
print
(
"LENGTH OF SEQUENCE:"
,
len
(
seq
))
#print("PARSING SEQUENCE ", id, "\n")
if
"T"
in
seq
:
seq
=
str
(
seq
).
replace
(
"T"
,
"U"
)
if
len
(
seq
)
<=
2
00
:
if
len
(
seq
)
<=
3
00
:
#print("Running BayesPairing on full sequence")
maxs
=
run_BP
(
seq
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
p
,
k
)
if
interm
:
...
...
@@ -249,11 +250,12 @@ def run_fasta(input, modules_to_parse, dataset, ss="", arguments={}):
fOUTPUT
=
fOUTPUT
+
"
\n
"
+
stats
pickle
.
dump
(
prediction_scores
,
open
(
"../output/"
+
o
+
".pickle"
,
"wb"
))
else
:
print
(
"LENGTH OF SEQUENCE:"
,
len
(
input
))
if
"T"
in
input
:
input
=
input
.
upper
()
input
=
str
(
input
).
replace
(
"T"
,
"U"
)
sequences
=
[
input
]
if
len
(
input
)
<=
2
00
:
if
len
(
input
)
<=
3
00
:
maxs
=
run_BP
(
input
,
ss
,
modules_to_parse
,
dataset
,
"NONE"
,
m
,
n
,
sm
,
mc
,
p
,
k
,
sscons
)
all_maxes
=
[]
for
ind
,
mod
in
enumerate
(
modules_to_parse
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment