Commit f658890f authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

Merge branch 'master' of jwgitlab.cs.mcgill.ca:sarrazin/bp6-1

parents c1cf28fd 8f347d9f
......@@ -3,8 +3,9 @@ This software requires python 3.5, networkx 1.9, numpy and pickle, as well a pgm
All scripts can be found in src/pgmpy, updated from github https://github.com/pgmpy/pgmpy
RNAfold is also required to be in your PATH when you run Bayespairing.
Two python scripts are required to run BayesPairing.
Three python scripts are required to run BayesPairing.
PARSE_SEQUENCES : run BayesPairing
parse_sequences.py is used to run BayesPairing on a set of sequences, to parse them for some set of motifs.
The following arguments can be used to call it. only -seq is required.
......@@ -21,10 +22,34 @@ The following arguments can be used to call it. only -seq is required.
-k K Number of times the Bayes Net should be sampled
To test parse_sequences, you can try the following command, which will run the default 40 modules on a test cDNA sequence :
python parse_sequences -d all_carnaval -seq ATGGCTCAGGAGACTAACCAGACCCCGGGGCCCATGCTGTGTAGCACAGGATGTGGCTTTTATGGAAATCCTAGGACAAATGGAATGTGTTCAGTTTGCTACAAAGAACATCTTCAGAGGCAGCAGAATAGTGGCAGAATGAGCCCAATGGGGACAGCTAGTGGTTCCAACAGTCCTACCTCAGATTCTGCATCTGTACAGAGAGCAGACACTAGCTTAAACAACTGTGAAGGTGCTGCTGGCAGCACATCTGAAAAATCAAGAAATGTGCCTGCGGCTGCCTTGCCTGTAACTCAGCAAATGACAGAAATGAGCATTTCAACAGAGGACAAAATAACTACCCCGAAAACAGAGGTGTGAGAGCCAGTTGTCACTCAGCCCAGCCCATCAGTTTTTCAGCCCAGTACTTCTCAGAGTAAAGAAAAAGCTCCTGAATTGCCCAAACCAAAGAAAAACAGATGTTTCATGTGCAGAAAGAAAGTTGGTCTTACAGGTTTGACTGCCGATGTGGAAATTTGTTTTGTGGACTTCACCGTTAACTCTGACAAGCACAACTGTCCGTATGATTACAAAGCAGAAGCTGCAGCAAAAATCAGAAAAGAGAATCCAGTTGTTGTGGCTGAAAAAATTCAGAGAATA
python parse_sequences.py -d all_carnaval -seq ATGGCTCAGGAGACTAACCAGACCCCGGGGCCCATGCTGTGTAGCACAGGATGTGGCTTTTATGGAAATCCTAGGACAAATGGAATGTGTTCAGTTTGCTACAAAGAACATCTTCAGAGGCAGCAGAATAGTGGCAGAATGAGCCCAATGGGGACAGCTAGTGGTTCCAACAGTCCTACCTCAGATTCTGCATCTGTACAGAGAGCAGACACTAGCTTAAACAACTGTGAAGGTGCTGCTGGCAGCACATCTGAAAAATCAAGAAATGTGCCTGCGGCTGCCTTGCCTGTAACTCAGCAAATGACAGAAATGAGCATTTCAACAGAGGACAAAATAACTACCCCGAAAACAGAGGTGTGAGAGCCAGTTGTCACTCAGCCCAGCCCATCAGTTTTTCAGCCCAGTACTTCTCAGAGTAAAGAAAAAGCTCCTGAATTGCCCAAACCAAAGAAAAACAGATGTTTCATGTGCAGAAAGAAAGTTGGTCTTACAGGTTTGACTGCCGATGTGGAAATTTGTTTTGTGGACTTCACCGTTAACTCTGACAAGCACAACTGTCCGTATGATTACAAAGCAGAAGCTGCAGCAAAAATCAGAAAAGAGAATCCAGTTGTTGTGGCTGAAAAAATTCAGAGAATA
parse_sequences will return the score and the position of insertion of the N best candidates.
MODULE_FROM_DESC : create your own module and build your own dataset
module_from_desc.py is used to build a dataset to use as the -d option for parse_sequences.py
you can call it with a .DESC file, which describes a graph, and a .fasta file of sequences matching this structure.
The arguments are the following, they are all required except the pdb file. :
-h, --help show this help message and exit
-g G graph of the module, DESC file
-seq SEQ sequences, FASTA format
-n N Dataset name (create new or add to existing)
-pdb [PDB [PDB ...]] PDBs in which input is found
You can try the following command to test it; it will create a dataset called "test", composed of a single module,
generated from three examples of the .desc file :
python module_from_desc.py -g ../models/1A1T.B.1.desc -seq ../models/input.fasta -n test
DISPLAY_MODULES : Show the modules of a dataset
display_modules.py is a tool that helps you visualize what you just mined the sequence with.
It generates a sequence logo and a structural graph of every module in a dataset corresponding to the input name
usage: display_modules.py [-h] -n N
You can test this script by showing the module you just created with module_from_desc.py as follows :
python display_modules.py -n test
\ No newline at end of file
id: 1
Bases: 208_C 209_G 210_G 211_A 212_G 213_G
( 211_A )--- C/C - --- ( 212_G )
( 210_G )--- C/C - --- ( 211_A )
( 212_G )--- C/C - --- ( 213_G )
( 208_C )--- +/+ c --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- 5/5 s --- ( 213_G )
( 209_G )--- C/C - --- ( 210_G )
( 208_C )--- C/C - --- ( 209_G )
> seqA
CCGAGC
> seqB
CGCAAG
> seqC
CGGAGA
\ No newline at end of file
import pickle
import make_logo
import argparse
import ncDraw
import networkx as nx
import os
PATH_GRAPHS = '../Graphs'
def logo_module(aln,name):
for i,gs in enumerate(aln):
with(open("seq"+str(i)+".txt", "w+")) as f:
for g in gs:
n = sorted(g.nodes(data=True))
pos = [z[0] for z in n]
seq ="".join([z[1]['nt'] for z in n])
print(pos)
print(seq)
f.write(seq)
f.write('\n')
make_logo.make_logo("seq"+str(i)+".txt", "../Graphs/"+name+"_logo"+str(i))
os.remove("seq" + str(i) + ".txt")
def plot_module(data,name):
data = [x[0] for x in data]
print("LEN DATA")
print(len(data))
data = ncDraw.remove_already_there(data, PATH_GRAPHS)
lb, ub = 0, len(data)
tot = sum(1 for x in os.listdir('../Graphs') if (not x.startswith('.') and x.endswith('nxpickle')))
# lb,ub = 1,5
for i in range(lb, ub):
g = data[i]
i += tot
# print "NCM",(i+1)
nx.write_gpickle(g, os.path.join(PATH_GRAPHS, name+'_module-%s.nxpickle' % (i)))
print('preparing for strands')
strands = ncDraw.buildStrands(g)
print('ordering strands')
orderStrands = ncDraw.placeStrands(g, strands)
print('preparing coords')
coords = ncDraw.shiftHeight(g, strands, orderStrands)
print('creating file')
ncDraw.createTikz(i, g, coords,name)
# collate(lb, ub)
if __name__ == "__main__":
arguments = {}
parser = argparse.ArgumentParser()
parser.add_argument("-n", help="Dataset name (create new or add to existing)", required=True)
args = parser.parse_args()
model_name = args.n
aln = pickle.load(open("../models/"+model_name+"_one_of_each_graph.cPickle","rb"))
print(aln)
logo_module(aln,model_name)
plot_module(aln,model_name)
\usepackage{geometry}%
\usepackage{relsize}
\usepackage{xifthen}
\usepackage{tikz,tkz-graph,tikz-cd}
\usetikzlibrary{calc}
\usetikzlibrary{arrows,arrows.meta,positioning}
\usetikzlibrary{shapes,shapes.geometric,decorations,decorations.markings}
\usetikzlibrary{plotmarks}
\newcommand{\ScaleFactor}{1.2}
\newcommand{\XScale}[1]{1.1*#1}
\newcommand{\YScale}[1]{0.8*#1}
\tikzstyle{base}=[thick]
\tikzstyle{bp}=[blue!80!black,shorten >=2pt,shorten <=2pt]
\tikzstyle{lr}=[red!80!black,shorten >=2pt,shorten <=2pt]
\tikzstyle{B53}=[->,thick, black]
\tikzstyle{nt}=[circle, draw=gray, fill=gray!10,minimum width=10pt]
\tikzstyle{lbl}=[inner sep=0, yshift=10pt,font={\relsize{-2}}]
\tikzset{cbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.5*\pgfdecoratedpathlength+5pt
with {%
\ifthenelse{\equal{#1}{W}}{\arrow{Circle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{S}}{\arrow{Triangle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{H}}{\arrow{Square[scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{rcbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.75*\pgfdecoratedpathlength+5pt
with {%
\ifthenelse{\equal{#1}{W}}{\arrow{Circle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{S}}{\arrow{Triangle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{H}}{\arrow{Square[scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{tbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.5*\pgfdecoratedpathlength+2.7pt
with {%
\ifthenelse{\equal{#1}{W}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{S}}{\arrow{Triangle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{H}}{\arrow{Square[fill=white,scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{rtbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.75*\pgfdecoratedpathlength+2.7pt
with {%
\ifthenelse{\equal{#1}{W}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{S}}{\arrow{Triangle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{H}}{\arrow{Square[fill=white,scale=\ScaleFactor]}}{}%
}
}}}
\newcommand{\Se}{{Triangle[scale=\ScaleFactor]}}
\newcommand{\We}{{Circle[scale=\ScaleFactor]}}
\newcommand{\He}{{Square[scale=\ScaleFactor]}}
\newcommand{\STe}{{Triangle[fill=white,scale=\ScaleFactor]}}
\newcommand{\WTe}{{Circle[fill=white,scale=\ScaleFactor]}}
\newcommand{\HTe}{{Square[fill=white,scale=\ScaleFactor]}}
%\newcommand{\SW}{{Triangle[fill=white,scale=\ScaleFactor]Circle[fill=white,scale=\ScaleFactor]}}
\tikzset{ttbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.5*\pgfdecoratedpathlength-2pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Triangle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
},
mark=at position 0.5*\pgfdecoratedpathlength+4.7pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Square[fill=white,scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{ctbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.5*\pgfdecoratedpathlength-2pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Triangle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Circle[scale=\ScaleFactor]}}{}%
},
mark=at position 0.5*\pgfdecoratedpathlength+4.7pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Circle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Square[scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{rttbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.75*\pgfdecoratedpathlength-2pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Triangle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
},
mark=at position 0.75*\pgfdecoratedpathlength+4.7pt
with {%
\ifthenelse{\equal{#1}{SW}}{\arrow{Circle[fill=white,scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{WH}}{\arrow{Square[fill=white,scale=\ScaleFactor]}}{}%
}
}}}
\tikzset{chsbps/.style={postaction=decorate,decoration={
markings,
mark=at position 0.5*\pgfdecoratedpathlength-2pt
with {%
\ifthenelse{\equal{#1}{HS}}{\arrow{Square[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{SH}}{\arrow{Triangle[scale=\ScaleFactor]}}{}%
},
mark=at position 0.5*\pgfdecoratedpathlength+4.7pt
with {%
\ifthenelse{\equal{#1}{HS}}{\arrow{Triangle[scale=\ScaleFactor]}}{}%
\ifthenelse{\equal{#1}{SH}}{\arrow{Square[scale=\ScaleFactor]}}{}%
}
}}}
\tikzstyle{CWW}=[base,double]
\tikzstyle{CSS}=[base,cbps={S}]
\tikzstyle{rCSS}=[base,rcbps={S}]
\tikzstyle{CHH}=[base,cbps={H}]
\tikzstyle{CSH}=[base,\Se-\He]
\tikzstyle{CSW}=[base,\Se-\We]
\tikzstyle{CWS}=[base,\We-\Se]
\tikzstyle{CHS}=[base,\He-\Se]
\tikzstyle{CHW}=[base,\He-\We]
\tikzstyle{CWH}=[base,\We-\He]
\tikzstyle{TWW}=[base,tbps={W}]
\tikzstyle{rTWW}=[base,rtbps={W}]
\tikzstyle{TSS}=[base,tbps={S}]
\tikzstyle{rTSS}=[base,rtbps={S}]
\tikzstyle{THH}=[base,tbps={H}]
\tikzstyle{TSH}=[base,\STe-\HTe]
\tikzstyle{TSW}=[base,\STe-\WTe]
\tikzstyle{TtSW}=[base,rttbps={SW}]
\tikzstyle{TWS}=[base,\WTe-\STe]
\tikzstyle{THS}=[base,\HTe-\STe]
\tikzstyle{THW}=[base,\HTe-\WTe]
\tikzstyle{TWH}=[base,\WTe-\HTe]
\tikzstyle{TtWH}=[base,ttbps={WH}]
\tikzstyle{CtWH}=[base,ctbps={WH}]
\tikzstyle{CtHS}=[base,chsbps={HS}]
\tikzstyle{CtSW}=[base,ctbps={SW}]
\newcommand{\rTSS}[2]{\draw[rTSS, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rlTSS}[2]{\draw[rTSS, bp] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\ruTSS}[2]{\draw[rTSS, lr] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\cTSS}[2]{\draw[TSS, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\cCSS}[2]{\draw[CSS, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rTWW}[2]{\draw[TWW, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rCSS}[2]{\draw[rCSS, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rlCSS}[2]{\draw[rCSS, bp] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\ruCSS}[2]{\draw[rCSS, lr] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\rlCHS}[2]{\draw[CtHS, bp] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rulTSW}[2]{\draw[TtSW, bp] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\rCSW}[2]{\draw[CtSW, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,-0.0)$);}
\newcommand{\rTSW}[2]{\draw[TtSW, lr] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\rTWH}[2]{\draw[TtWH, lr] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\ruCWH}[2]{\draw[CtWH, lr] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
\newcommand{\rCWH}[2]{\draw[CtWH, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,0.0)$);}
\newcommand{\rdTWH}[2]{\draw[TtWH, lr] ($(n-#1.south east) + (0.01,0)$) |- ($(n-#2.south west) + (-0.01,-0.2)$) -- ($(n-#2.south west) + (-0.01,0.0)$);}
\newcommand{\ruCWW}[2]{\draw[CWW, bp] ($(n-#1.north east) + (0.01,0)$) |- ($(n-#2.north west) + (-0.01,0.2)$) -- ($(n-#2.north west) + (-0.01,0.0)$);}
#!/var/www/html/cgi-bin/carnaval/Src/VEnv/bin/python
import sys
from weblogolib import *
def help():
return """Required args:
-i <input file> list of sequences, one per line
-o <output file name> name of output, will add .eps
Optionals:
-t <string> title
-h this help displayed
"""
def make_logo(in_file, out_file):
fin = open(in_file)
seqs = read_seq_data(fin)
data = LogoData.from_seqs(seqs)
options = LogoOptions()
options = LogoOptions()
options.color_scheme = colorscheme.nucleotide
options.unit_name = 'probability'
options.fineprint = ''
options.creator_text = ''
format = LogoFormat(data, options)
out = pdf_formatter(data, format)
with open('%s.pdf' % out_file, 'wb') as f:
f.write(out)
if __name__ == '__main__':
in_file = None
out_file = None
title = None
opts = sys.argv
for i, o in enumerate(opts):
if o == '-i':
in_file = opts[i + 1]
elif o == '-o':
out_file = opts[i + 1]
elif o in ('-h', '-help'):
print(help())
if not in_file or not out_file:
# print "need args -i and -o"
sys.exit(1)
make_logo(in_file, out_file)
......@@ -4,12 +4,9 @@ import matplotlib
from matplotlib import pyplot as plt
import networkx.algorithms.isomorphism as iso
import operator
import os.path
import pickle
MODULES = []
try:
MODULES = pickle.load(open("../models/MODULES.cPickle","rb"))
except FileNotFoundError:
print("No modules registered yet. Create you first now, or use a suggested dataset")
import argparse
......@@ -66,7 +63,15 @@ def make_graph(desc_file):
if n[0].isdigit():
node_n = n[:-2]
nuc = n[-1]
g.add_node(int(node_n), {'nt': nuc})
p_id = 0
if len(g.nodes())==0:
g.add_node(int(node_n), {'nt': nuc, 'part_id':0})
else:
if int(node_n) - int(g.nodes()[-1])<3:
p_id = g.nodes(data=True)[-1][1]['part_id']
else:
p_id = int(g.nodes(data=True)[-1][1]['part_id'])+1
g.add_node(int(node_n), {'nt': nuc, 'part_id': p_id})
for line in range(2, len(lines)):
bp = lines[line].split("---")
p1 = bp[0][4:8].replace(" ", "")
......@@ -77,12 +82,12 @@ def make_graph(desc_file):
if orientation == 's':
continue
elif interaction == "C/C":
g.add_edge(int(p1), int(p2), {'long_range': False, 'label': 'b53'})
g.add_edge(int(p1), int(p2), {'long_range': False, 'label': 'B53'})
elif interaction == "+/+" or interaction == "-/-":
bond = orientation + "WW"
bond = orientation.upper() + "WW"
g.add_edge(int(p1), int(p2), {'long_range': False, 'label': bond})
else:
bond = orientation + interaction[0] + interaction[2]
bond = orientation.upper() + interaction[0] + interaction[2]
g.add_edge(int(p1), int(p2), {'long_range': False, 'label': bond})
return g
......@@ -104,12 +109,39 @@ def make_new_graph_examples(g,fasta):
print(h.nodes(data=True))
return graphs
def create_module(desc,fasta):
def create_module(desc,fasta,model_name, PDBs=[]):
graphs_name = "../models/" +model_name+"_one_of_each_graph.cPickle"
aln_name = "../models/"+model_name+"aligned_modulegraphs.Pickle"
PDB_name = "../models/"+model_name+"PDB.Pickle"
g = make_graph(desc)
graph_list = make_new_graph_examples(g, fasta)
MODULES.append(graph_list)
pickle.dump(MODULES, open("../models/MODULES.cPickle", 'wb'))
return graph_list
if os.path.isfile(graphs_name):
graphs = pickle.load(open(graphs_name,"rb"))
graphs.append(graph_list)
aln = pickle.load(open(aln_name,"rb"))
for graph in range(len(graph_list)):
for position in aln:
aln[position].append(position)
PDB_list = pickle.load(open(PDB_name,"rb"))
if PDB_list is None:
PDB_list = []
PDB_list.append(PDBs)
else:
graphs=[graph_list]
aln = {}
for graph in range(len(graph_list)):
for position in graph_list[0].nodes():
if position not in aln:
aln[position] = [position]
else:
aln[position].append(position)
PDB_list = [PDBs]
pickle.dump(graphs, open(graphs_name, 'wb'))
pickle.dump(aln, open(aln_name, 'wb'))
pickle.dump(PDB_list, open(PDB_name, 'wb'))
return graphs,aln
def fuse_existing_databases(new_dataset,name1,name2,l1,l2):
......@@ -165,4 +197,15 @@ if __name__ == "__main__":
#desc_file = "../models/1A1T.B.1.desc"
#fasta_file = "../models/input.fasta"
#graphs = create_module(desc_file,fasta_file)
fuse_existing_databases("all_carnaval",'carnaval','jeffrey',[0, 5, 8, 26, 6, 18, 19, 21, 24, 25, 39, 53, 54, 58, 119, 122, 135, 146, 162, 168, 191, 194, 198, 216],[1,4,7,10,11,24,33,40,55,66,69,101,103,113,149,156,167])
#fuse_existing_databases("all_carnaval",'carnaval','jeffrey',[0, 5, 8, 26, 6, 18, 19, 21, 24, 25, 39, 53, 54, 58, 119, 122, 135, 146, 162, 168, 191, 194, 198, 216],[1,4,7,10,11,24,33,40,55,66,69,101,103,113,149,156,167])
arguments = {}
parser = argparse.ArgumentParser()
#parser.add_argument("--verbose", help="increase output verbosity", action="store_true")
parser.add_argument("-g", help="graph of the module, DESC file", required = True)
parser.add_argument("-seq", help="sequences, FASTA format",required=True)
parser.add_argument("-n", help="Dataset name (create new or add to existing)", required=True)
parser.add_argument('-pdb', nargs='*', help='PDBs in which input is found')
args = parser.parse_args()
mod = create_module(args.g,args.seq, args.n, args.pdb)
for i in mod:
print(i)
import pickle
import networkx as nx
import matplotlib.pyplot as plt
import itertools
import subprocess
import os
PATH_GRAPHS = '../Graphs'
def buildStrands(g):
"""
Builds the strands, i.e. the sequences of positions connected by
backbone connections, and regroups them within a two level structure.
First level: Left/right of long range interaction
Second level: list of strands
TODO (for the brave only):
1) Distinguish between strands and hairpin loops
2) Special treatment for isolated nucleotides
"""
children = {n: [] for n in g.nodes()}
nonRoots = {}
strands = {0: [], 1: []}
if "part_id" in g.nodes(data=True)[0]:
s = sorted(set(n[1]['part_id'] for n in g.nodes(data=True)))
sides = {n[0]: s.index(n[1]['part_id']) for n in g.nodes(data=True)}
else:
s = sorted(set(0 for n in g.nodes(data=True)))
sides = {n[0]: s.index(0) for n in g.nodes(data=True)}
for n in g.nodes(data=True):
n1 = n[0]
data = n[1]
for e in g.edges(data=True):
n1, n2 = e[0], e[1]
attr = e[2]
if attr["label"] == "B53":
children[n1].append(n2)
nonRoots[n2] = 0
for n in g.nodes(data=True):
n1 = n[0]
if n1 not in nonRoots:
s = sides[n1]
nlist = [n1]
l = children[n1][:]
nprev = n1
while len(l) > 0:
nnext = l.pop()
nlist.append(nnext)
l = l + children[nnext]
nprev = nnext
strands[s].append(nlist)
return strands
def doesCross(n1, n2, m1, m2, node2strand):
"""
Check whether two base-pairs cross in the current strand ordering/orientations
"""
if node2strand[n1] == node2strand[m2]:
m1, m2 = m2, m1
if node2strand[n1] == node2strand[m1] and node2strand[n2] == node2strand[m2]:
return (-1 * (n1 - m1) * (n2 - m2) * node2strand[n1] * node2strand[n2] > 0)
return False
def neighbors(s1, s2):
"""
Check whether two strands, identified by their strand orders or by their x-ordinates, are adjacent.
"""
return abs(abs(s1) - abs(s2)) == 1