Commit 5b22353a authored by Carlos GO's avatar Carlos GO
Browse files

readme

parent 2779ff55
# `RNAmigos` RNA-small molecule ligand prediction toolkit
This package includes tools for:
* Extracting binding pockets from PDB structures
* Building graph representations of RNA 3D structurse
* RNA 3D graph plotting
* RNA 3D graph edit distance computation
* Vector embedding for RNA 3D graphs
* Ligand fingerprint prediction
Data used for training, models and binding pocket visualizations available [here](http://csb.cs.mcgill.ca/RNAmigos/).
## Requirements
* Python 3.6+
* Networkx 2.1+
* BioPython
......@@ -3,21 +3,6 @@
A* GED for RNA graphs.
Given two 2.5D RNA graphs, compute minimum graph edit distance.
Every time a node is added it must be connected to the backbone.
Nodes are also ordered..
Max degree is 3 (maybe 4 for base triples..).
Maybe decompose into planar graphs first and then solve exact for non-nested
keeping secondary structure fixed.
1. Remove pseudoknots.
2. Align SSEs. 3. Get GED of corresponding loop regions.
Just implement brute force A* first and then see where to improve.
"""
import os,sys
import logging
......@@ -89,9 +74,9 @@ def label_path(op_node, g1, g2):
else:
return f"{short(e1)} {label(e1,g1)} - {short(e2)} {label(e2,g2)} --> {label_path(op_node.parent, g1, g2)}"
def edge_edit_cost(u,v, g1, g2, node):
def edge_edit_cost(u,v, g1, g2, node, indel_cost=4, sub_cost=1, ss_break_cost=2):
"""
Recursively search for edge substitution.
Recursively search for edge substitution.
"""
if node == None:
return 0
......@@ -106,14 +91,16 @@ def edge_edit_cost(u,v, g1, g2, node):
l1 = g1[e1[0]][e1[1]]['label']
l2 = g2[e2[0]][e2[1]]['label']
if l1 != l2:
cost = 1
cost = sub_cost
if 'B53' in (l1, l2):
cost = sys.maxsize
if l1 == 'CWW' != l2 'CWW':
cost = ss_break_cost
elif e1 not in g1.edges and e2 in g2.edges:
#edge insert+ion
cost = 2
cost = indel_cost
elif e1 in g1.edges and e2 not in g2.edges:
cost = 2
cost = indel_cost
else:
cost = 0
return cost + edge_edit_cost(u,v,g1,g2,node.parent)
......@@ -218,10 +205,48 @@ def heuristic(op, parent, g1, g2):
return h_cost
def local_heuristic(op, parent, g1, g2):
u,v = op
source_done = set()
target_done = set()
if parent != None:
source_done = set(parent.source_map.keys())
target_done = set(parent.target_map.keys())
# label = lambda e,G: G[e[0]][e[1]]['label']
try:
source_done.remove('NILL')
except KeyError:
pass
try:
target_done.remove('NILL')
except KeyError:
pass
if u == 'NILL':
diff = 2 * len(set(g2[v]).difference(target_done))
return diff
if v == 'NILL':
diff = 2 * len(set(g1[u]).difference(source_done))
return diff
try:
unmapped_nei_u = set(g1[u]).difference(source_done)
except:
pass
try:
unmapped_nei_v = set(g2[v]).difference(target_done)
except:
pass
diff = 2 * (abs(len(unmapped_nei_v) - len(unmapped_nei_u)))
return diff
def ged(graphs, with_heuristic=False, timeout=30, halt=None, source_only=False):
"""
Compute GED of two graphs. """
Compute GED of two graphs.
"""
g1,g2 = graphs
biggest = np.argmax((len(g1), len(g2)))
g2 = graphs[biggest]
......@@ -230,14 +255,9 @@ def ged(graphs, with_heuristic=False, timeout=30, halt=None, source_only=False):
k = 0
g1_nodes = list(g1.nodes())
g2_nodes = list(g2.nodes())
# g1_nodes = list(g1.edges)
# g2_nodes = list(g2.edges)
root = OpNode()
# G = PG.AGraph(directed=True, strict=True)
# G.node_attr['style']='filled'
best = None
count = 0
......@@ -309,9 +329,6 @@ def ged(graphs, with_heuristic=False, timeout=30, halt=None, source_only=False):
count += 1
return best
def edge_set(g):
pass
def _graph_draw(g, color='blue', save=None):
pos = nx.spring_layout(g)
# pos = nx.spring_layout(g)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment