Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roman Sarrazin-Gendron
RNABayesPairing2
Commits
fd70e074
Commit
fd70e074
authored
Nov 26, 2020
by
Roman Sarrazin-Gendron
Browse files
Merge branch 'treedecomp' into 'master'
Use treedecomp package See merge request
!1
parents
c0043541
870513fb
Changes
3
Hide whitespace changes
Inline
Side-by-side
bayespairing/src/module_td.py
View file @
fd70e074
...
...
@@ -23,15 +23,15 @@ def get_dependencies(module, map_index, out=None):
G
=
nx
.
Graph
()
G
.
add_nodes_from
(
range
(
len
(
module
.
nodes
)))
G
.
add_edges_from
([(
map_index
[
i
],
map_index
[
j
])
for
i
,
j
in
module
.
edges
()])
td
=
treedecomp
.
makeTD_nx
(
G
)
td
=
treedecomp
.
TreeDecompositionFactory
().
create
(
len
(
G
.
nodes
),
G
.
edges
)
if
out
:
td
.
writeTD
(
out
)
res
=
{}
for
idx
in
td
.
toposorted_bag_indices
():
children
=
td
.
adj
[
idx
]
x
=
td
.
bags
[
idx
]
x
=
td
.
get_
bags
()
[
idx
]
for
idy
in
children
:
y
=
td
.
bags
[
idy
]
y
=
td
.
get_
bags
()
[
idy
]
key
=
td
.
diff_set
(
x
,
y
)[
0
]
value
=
td
.
sep_set
(
x
,
y
)
res
[
inv_map
[
key
]]
=
[
inv_map
[
x
]
for
x
in
value
]
...
...
bayespairing/src/treedecomp.py
deleted
100644 → 0
View file @
c0043541
#!/usr/bin/env python3
#
# InfraRed --- A generic engine for Boltzmann sampling over constraint networks
# (C) Sebastian Will, 2018
#
# This file is part of the InfraRed source code.
#
# InfraRed provides a generic framework for tree decomposition-based
# Boltzmann sampling over constraint networks
#
## @file
##
## Storing and manipulating tree decompositions
##
## Supports computation by delegate
##
## * generate tree decompositions usng libhtd (via python wrapper) or calling TDlib
##
## * plot dependency graphs and tree decompositions using dot
##
## NOTE: TDlib writes files to disk, currently it is *not* safe to use
## in distributed computations. In contrast, libhtd (default) does not
## write to disk at all
##
## For using TDlib, it has to be in the Java class path (environment
## variable CLASSPATH). The default tree decomposition by libhtd needs
## to find the shared library libhtd.so; this could require to set
## LD_LIBRARY_PATH manually.
import
os
import
subprocess
import
re
from
math
import
sqrt
,
ceil
from
networkx.algorithms.approximation.treewidth
import
treewidth_min_degree
## @brief Class to hold a tree decomposition
##
## contains
##
## * bags a list of 0-based indexed lists of vertices of the decomposed graph (or "variables")
##
## * edges a list of edges; an edge is a list of bags (bag1, bag2)
##
## * adj adjacency lists (as defined by edges)
##
## edges are directed, an edge from bag x to y is represented by (x,y)
class
TreeDecomp
:
## @brief construct from bags and edges
##
## @param bags lists of indices of the variables (vertices) in the
## bag
##
## @param edges list of edges between the bags; edges are
## specified as pairs (x,y), where x is the index of the parent
## bag and y, of the child bag (0-based indices according to
## bags).
##
## Edges must be directed from root(s) to leaves. Typically the
## bags and edges result from calling a tree decomposition
## algorithm. The supported algorithms return correctly oriented
## edges.
def
__init__
(
self
,
bags
,
edges
):
# copy bags
self
.
bags
=
list
(
bags
)
# ensure that all edges are tuples or lists of length 2
assert
(
all
(
len
(
x
)
==
2
for
x
in
edges
))
# copy edges and convert all edges to pairs (even if they have been lists)
self
.
edges
=
list
(
map
(
lambda
x
:
(
x
[
0
],
x
[
1
]),
edges
))
self
.
update_adjacency_lists
()
## @brief Comute adjacency list representation
##
## @param n number of nodes
## @param edges list of edges
@
staticmethod
def
adjacency_lists
(
n
,
edges
):
adj
=
{
i
:[]
for
i
in
range
(
n
)}
for
(
i
,
j
)
in
edges
:
adj
[
i
].
append
(
j
)
return
adj
## @brief Update the adjacency representation
##
## Updates the adjacency representation in adj according to the
## number of bags and edges
def
update_adjacency_lists
(
self
):
self
.
adj
=
self
.
adjacency_lists
(
len
(
self
.
bags
),
self
.
edges
)
## @brief Toppological sort of bags
##
## @returns sorted list of bag indices
def
toposorted_bag_indices
(
self
):
n
=
len
(
self
.
bags
)
visited
=
set
()
sorted
=
list
()
def
toposort_component
(
i
):
visited
.
add
(
i
)
for
j
in
self
.
adj
[
i
]:
if
not
j
in
visited
:
toposort_component
(
j
)
sorted
.
append
(
i
)
for
i
in
range
(
n
):
if
not
i
in
visited
:
toposort_component
(
i
)
return
sorted
[::
-
1
]
## @brief Difference set
##
## @param xs first list
## @param ys second list
##
## @return ys setminus xs
##
## For bags xs and ys, this computes the introduced variable
## indices when going from parent xs to child ys.
@
staticmethod
def
diff_set
(
xs
,
ys
):
return
[
y
for
y
in
ys
if
y
not
in
xs
]
## @brief Separator set
##
## @param xs first list
## @param ys second list
##
## @return overlap of xs and ys
##
## For bags xs and ys, this computes the 'separator' of xs and ys,
## i.e. the common variables.
@
staticmethod
def
sep_set
(
xs
,
ys
):
return
[
y
for
y
in
ys
if
y
in
xs
]
## @brief Get tree width
def
treewidth
(
self
):
return
max
([
len
(
bag
)
for
bag
in
self
.
bags
])
-
1
## @brief Write tree decomposition in dot format
## @param out output file handle
def
writeTD
(
self
,
out
):
def
baglabel
(
bag
):
if
len
(
bag
)
==
0
:
return
""
lwidth
=
ceil
(
sqrt
(
len
(
bag
))
)
lnum
=
ceil
(
len
(
bag
)
/
lwidth
)
xs
=
[
str
(
i
)
for
i
in
sorted
(
bag
)]
lines
=
list
()
for
i
in
range
(
0
,
lnum
):
lines
.
append
(
" "
.
join
(
xs
[
i
*
lwidth
:(
i
+
1
)
*
lwidth
]))
return
"
\\
n"
.
join
(
lines
)
out
.
write
(
"digraph G {
\n\n
"
)
for
bagid
,
bag
in
enumerate
(
self
.
bags
):
label
=
baglabel
(
bag
)
out
.
write
(
"
\t
bag{} [label=
\"
{}
\"
]
\n
"
.
format
(
bagid
+
1
,
label
)
)
out
.
write
(
"
\n\n
"
)
for
(
x
,
y
)
in
self
.
edges
:
edgelabel
=
" "
.
join
(
[
str
(
x
)
for
x
in
self
.
diff_set
(
self
.
bags
[
x
],
self
.
bags
[
y
]
)]
)
out
.
write
(
"
\t
bag{} -> bag{} [label=
\"
{}
\"
]
\n
"
.
format
(
x
+
1
,
y
+
1
,
edgelabel
)
)
out
.
write
(
"
\n
}
\n
"
)
## @brief Guarantee a certain maximal diff set size
##
## @param maxdiffsize maximum size of diff sets after transformation
##
## @see diff_set()
##
## @pre the tree is connected
##
## Expands the tree by inserting a minimum number of in-between
## bags whereever the diff set size exceeds maxdiffsize. This can
## limit the complexity of certain algorithm based on the tree (in
## particular sampling in Infrared).
##
def
expand_treedecomposition
(
self
,
maxdiffsize
=
1
):
def
chunks
(
l
,
n
):
"""Yield successive n-sized chunks from l."""
for
i
in
range
(
0
,
len
(
l
),
n
):
yield
l
[
i
:
i
+
n
]
n
=
len
(
self
.
bags
)
root
=
self
.
toposorted_bag_indices
()[
0
]
next_bag_idx
=
n
## perform depth first traversal
visited
=
set
()
stack
=
[(
None
,
root
)]
# parent idx, bag index
while
stack
:
#pop
(
u
,
v
)
=
stack
[
-
1
]
stack
=
stack
[:
-
1
]
if
v
in
visited
:
continue
visited
.
add
(
v
)
# push children on stack
for
w
in
self
.
adj
[
v
]:
stack
.
append
((
v
,
w
))
if
u
is
not
None
:
# determine diff set
diff
=
self
.
diff_set
(
self
.
bags
[
u
],
self
.
bags
[
v
])
if
len
(
diff
)
>
maxdiffsize
:
sep
=
self
.
sep_set
(
self
.
bags
[
u
],
self
.
bags
[
v
])
if
(
u
,
v
)
in
self
.
edges
:
self
.
edges
.
remove
((
u
,
v
))
if
(
v
,
u
)
in
self
.
edges
:
self
.
edges
.
remove
((
v
,
u
))
last_bag_idx
=
u
newbag
=
sep
for
ext
in
chunks
(
diff
[:
-
1
],
maxdiffsize
):
newbag
.
extend
(
ext
)
self
.
bags
.
append
(
newbag
[:])
self
.
edges
.
append
([
last_bag_idx
,
next_bag_idx
])
last_bag_idx
=
next_bag_idx
next_bag_idx
+=
1
self
.
edges
.
append
([
last_bag_idx
,
v
])
self
.
update_adjacency_lists
()
# ##########################################################
# writing graphs to files in different formats
#
## @brief Plot graph
##
## @param graphfile file of graph in dot format
##
## The graph is plotted and written to a pdf file by calling
## graphviz's dot tool on th dot file.
def
dotfile_to_pdf
(
graphfile
):
outfile
=
re
.
sub
(
r
".dot$"
,
".pdf"
,
graphfile
)
subprocess
.
check_output
([
"dot"
,
"-Tpdf"
,
"-o"
,
outfile
,
graphfile
])
## @brief Write graph in dgf format
##
##@param out output file handle
##@param num_nodes number of nodes
##@param edges the edges of the graph
def
write_dgf
(
out
,
num_nodes
,
edges
):
edge_num
=
len
(
edges
)
out
.
write
(
"p tw {} {}
\n
"
.
format
(
num_nodes
,
edge_num
))
for
(
u
,
v
)
in
sorted
(
edges
):
out
.
write
(
"e {} {}
\n
"
.
format
(
u
+
1
,
v
+
1
))
## @brief Write graph in dot format
##
## @param out output file handle
## @param num_nodes number of nodes
## @param edges the edges of the graph
def
write_dot
(
out
,
num_nodes
,
edges
):
edge_num
=
len
(
edges
)
out
.
write
(
"graph G{
\n\n
"
)
for
v
in
range
(
num_nodes
):
out
.
write
(
"
\t
node{idx} [label=
\"
{idx}
\"
]
\n
"
.
format
(
idx
=
v
))
for
(
u
,
v
)
in
edges
:
out
.
write
(
"
\t
node{} -- node{}
\n
"
.
format
(
u
,
v
))
out
.
write
(
"
\n
}
\n
"
)
# ##########################################################
# Interface TDlib
#
# specific functions to call TDlib's tree decomposition tool
# and parse the result
#
## @brief Parse tree decomposition as written by TDlib
##
## @param tdfh file handle to tree decomposition in dot format
## @param num_nodes number of nodes
## @returns tree decomposition
##
## Assume file is in td format as written by the tool TDlib.
def
parseTD_TDlib
(
tdfh
,
num_nodes
):
bags
=
list
()
edges
=
list
()
for
line
in
tdfh
:
# catch bags "New_vertex"
if
re
.
search
(
"New_Vertex"
,
line
):
bags
.
append
([])
continue
m
=
re
.
search
(
r
"bag(\d+).+label=\"(.+)\""
,
line
)
if
m
:
bagid
=
int
(
m
.
group
(
1
))
label
=
m
.
group
(
2
)
labels
=
re
.
findall
(
'\d+'
,
label
)
labels
=
[
int
(
label
)
for
label
in
labels
]
if
bagid
!=
len
(
bags
)
+
1
:
raise
IOError
(
"Bag indices in td file must be consecutive (at bag {})!"
.
format
(
bagid
))
bags
.
append
(
labels
)
else
:
m
=
re
.
search
(
r
"bag(\d+) -- bag(\d+)"
,
line
)
if
m
:
edges
.
append
((
int
(
m
.
group
(
1
)),
int
(
m
.
group
(
2
))))
# decrease bag labels
def
dec
(
xs
):
return
[
[
x
-
1
for
x
in
ys
]
for
ys
in
xs
]
bags
=
dec
(
bags
)
edges
=
dec
(
edges
)
# add missing bags
present
=
set
()
for
b
in
bags
:
for
x
in
b
:
present
.
add
(
x
)
for
i
in
range
(
num_nodes
):
if
not
i
in
present
:
bags
.
append
([
i
])
return
TreeDecomp
(
bags
,
edges
)
## @brief Obtain tree deomposition using networkx
def
makeTD_nx
(
G
,
maxdiffsize
=
1
):
i
,
tree
=
treewidth_min_degree
(
G
)
bags
=
list
(
map
(
list
,
tree
.
nodes
))
edges
=
[(
bags
.
index
(
list
(
i
)),
bags
.
index
(
list
(
j
)))
for
i
,
j
in
tree
.
edges
]
td
=
TreeDecomp
(
bags
,
edges
)
td
.
expand_treedecomposition
(
maxdiffsize
)
return
td
# End of Interface tree demposition libraries
# ----------------------------------------------------------
if
__name__
==
"__main__"
:
pass
setup.py
View file @
fd70e074
...
...
@@ -9,6 +9,6 @@ setup(name='bayespairing',
license
=
'McGill'
,
packages
=
[
"bayespairing"
,
"bayespairing.src"
],
install_requires
=
[
"networkx"
,
"matplotlib"
,
"seaborn"
,
"weblogo"
,
"ghostscript"
,
"biopython"
,
"wrapt"
,
"anytree"
"networkx"
,
"matplotlib"
,
"seaborn"
,
"weblogo"
,
"ghostscript"
,
"biopython"
,
"wrapt"
,
"anytree"
,
"treedecomp"
],
zip_safe
=
False
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment