Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Anton
CaRNAval2
Commits
adb5dcac
Commit
adb5dcac
authored
Apr 18, 2020
by
Anton
Browse files
first commit
parents
Changes
1
Hide whitespace changes
Inline
Side-by-side
SRC/RIN.py
0 → 100644
View file @
adb5dcac
#external libraries
import
json
#to hash data of edges (dictionnaries)
class
RIN
:
graph
=
None
order
=
0
size
=
0
primary_key
=
""
canonical
=
False
ID
=-
1
d_edges_by_category
=
{}
secondary_key
=
""
nb_occurrences
=
0
d_occurrences
=
{}
#d_occurrences is a double dictionnary :
#lvl1 : key,d_occ (key is the (PDB_ID, chain, mode))
#lvl2 : key_occ,map (key_occ is the sorted list of node_IDs, map is a bijection of node_IDs in canonical to node_IDs in occurrence)
representing_occurrence
=
None
#(g_name,g_occurrence_key)
SSEs_distrib
=
None
def
__init__
(
self
,
graph
,
d_edges_by_category
,
g_name
,
h_name
,
d_generic_nodes_to_nodes_in_g
,
d_generic_nodes_to_nodes_in_h
):
#note : g is assumed to be the representing_occurrence
self
.
graph
=
graph
self
.
order
=
self
.
graph
.
order
()
self
.
size
=
self
.
graph
.
size
()
self
.
primary_key
=
'o'
+
str
(
self
.
order
).
zfill
(
6
)
+
's'
+
str
(
self
.
size
).
zfill
(
6
)
#zfill so primary keys are ordered
self
.
canonical
=
False
self
.
ID
=-
1
self
.
d_edges_by_category
=
d_edges_by_category
self
.
secondary_key
=
json
.
dumps
(
d_edges_by_category
,
sort_keys
=
True
)
self
.
d_occurrences
=
{}
g_occurrence_key
=
json
.
dumps
(
sorted
(
d_generic_nodes_to_nodes_in_g
.
values
()))
self
.
representing_occurrence
=
(
g_name
,
g_occurrence_key
)
self
.
d_occurrences
[
g_name
]
=
{
g_occurrence_key
:
d_generic_nodes_to_nodes_in_g
}
h_occurrence_key
=
json
.
dumps
(
sorted
(
d_generic_nodes_to_nodes_in_h
.
values
()))
self
.
d_occurrences
[
h_name
]
=
{
h_occurrence_key
:
d_generic_nodes_to_nodes_in_h
}
self
.
nb_occurrences
=
2
def
make_canonical
(
self
,
ID
):
self
.
canonical
=
True
self
.
ID
=
ID
def
get_keys
(
self
):
return
(
self
.
primary_key
,
self
.
secondary_key
)
def
create_occurrence_graphs
(
self
,
data
):
#as this operation is to be performed once and since copy is only used in it
#we prefer to exceptionnaly place the import inside the function
import
copy
l_occurrence_graphs
=
[]
for
key
,
d_occ
in
self
.
d_occurrences
.
items
():
PDB_ID
,
chain
,
suffix
=
key
for
key_occ
,
map
in
d_occ
.
items
():
g
=
copy
.
deepcopy
(
data
[(
PDB_ID
,
chain
)])
nodes_to_keep
=
list
(
map
.
values
())
nodes_to_remove
=
list
(
set
(
g
.
nodes
())
-
set
(
nodes_to_keep
))
g
.
remove_nodes_from
(
nodes_to_remove
)
l_occurrence_graphs
.
append
(((
PDB_ID
,
chain
),
suffix
,
key_occ
,
g
))
#
# if self.nb_occurrences != len(l_occurrence_graphs):
# print("PROBLEM 34",self.nb_occurrences,len(l_occurrence_graphs))
# return 1/0
return
l_occurrence_graphs
def
get_SSEs_distrib
(
self
,
data
=
None
):
if
self
.
SSEs_distrib
==
None
:
if
data
!=
None
:
self
.
SSEs_distrib
=
self
.
get_SSEs
(
data
)
return
self
.
SSEs_distrib
def
get_SSEs
(
self
,
data
):
#it's a total overkill to use create_occurrence_graphs just to get the distributions because of copy
distrib
=
{}
for
key
,
d_occ
in
self
.
d_occurrences
.
items
():
PDB_ID
,
chain
,
suffix
=
key
RNA_graph
=
data
[(
PDB_ID
,
chain
)]
for
key_occ
,
map
in
d_occ
.
items
():
d_sse_in_occ
=
{}
n_to_cover_1
=
[]
d_pid
=
{}
for
node
in
map
.
values
():
d_pid
[
node
]
=
RNA_graph
.
nodes
[
node
][
"part_id"
]
if
len
(
d_pid
[
node
])
<
2
:
d_sse_in_occ
[
d_pid
[
node
][
0
]]
=
True
else
:
n_to_cover_1
.
append
(
node
)
n_to_cover_2
=
[]
for
node
in
n_to_cover_1
:
covered
=
False
for
part_id
in
d_pid
[
node
]:
if
part_id
in
d_sse_in_occ
.
keys
():
covered
=
True
if
not
covered
:
n_to_cover_2
.
append
(
node
)
while
n_to_cover_2
!=
[]:
d_sse
=
{}
for
node
in
n_to_cover_2
:
for
sse
in
d_pid
[
node
]:
tmp
=
d_sse
.
get
(
sse
,[])
tmp
.
append
(
node
)
d_sse
[
sse
]
=
tmp
candidate_sse
=
''
candidate_count
=
0
for
sse
,
l_nodes
in
d_sse
.
items
():
if
len
(
l_nodes
)
>
candidate_count
:
candidate_sse
=
sse
candidate_count
=
len
(
l_nodes
)
d_sse_in_occ
[
candidate_sse
]
=
True
for
node
in
d_sse
[
candidate_sse
]:
n_to_cover_2
.
remove
(
node
)
distrib
[
len
(
d_sse_in_occ
)]
=
distrib
.
get
(
len
(
d_sse_in_occ
),
0
)
+
1
return
distrib
# def get_SSEs_OUTDATED(self,data): #OUTDATED, new version above
# #it's a total overkill to use create_occurrence_graphs just to get the distributions because of copy
# l_occurrence_graphs=self.create_occurrence_graphs(data)
# distrib={}
# for pdb,mode,key_occ,g_occ in l_occurrence_graphs:
# d_sse_in_occ={}
# n_to_cover_1=[]
# d_pid={}
# for node,info in g_occ.nodes(data=True):
# d_pid[node]=info["part_id"]
# if len(d_pid[node]) <2:
# d_sse_in_occ[d_pid[node][0]]=True
# else:
# n_to_cover_1.append(node)
# n_to_cover_2=[]
# for node in n_to_cover_1:
# covered=False
# for part_id in d_pid[node]:
# if part_id in d_sse_in_occ.keys():
# covered=True
# if not covered:
# n_to_cover_2.append(node)
# while n_to_cover_2 != []:
# d_sse={}
# for node in n_to_cover_2:
# for sse in d_pid[node]:
# tmp=d_sse.get(sse,[])
# tmp.append(node)
# d_sse[sse]=tmp
# candidate_sse=''
# candidate_count=0
# for sse,l_nodes in d_sse.items():
# if len(l_nodes) > candidate_count:
# candidate_sse=sse
# candidate_count=len(l_nodes)
# d_sse_in_occ[candidate_sse]=True
# for node in d_sse[candidate_sse]:
# n_to_cover_2.remove(node)
#
#
# distrib[len(d_sse_in_occ)]=distrib.get(len(d_sse_in_occ),0)+1
# return distrib
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment