Commit ba064f26 authored by Vladimir Reinharz's avatar Vladimir Reinharz
Browse files

v1.0

parents
This diff is collapsed.
AM942759.1: Proteus mirabilis strain HI4320, complete genome
ABVP01000019.1: Proteus penneri ATCC 35198, P_penneri-1.1.1_Cont6.1, whole genome shotg or ABVP01000023.1: Proteus penneri ATCC 35198 P_penneri-1.1.1_Cont336, whole genome shotg
ABJD02000102.1: Providencia stuartii ATCC 25827, P_stuartii-2.0.1_Cont1789, whole genom or ABJD02000101.1: Providencia stuartii ATCC 25827 P_stuartii-2.0.1_Cont1786, whole genom
ABXV02000023.1: Providencia rustigianii DSM 4541, P_rustigianii-1.0.1_Cont1.9, whole ge or ABXV02000027.1: Providencia rustigianii DSM 4541 P_rustigianii-1.0.1_Cont2.12, whole g
CP002154.1: Edwardsiella tarda FL6-60, complete genome
CP001135.1: Edwardsiella tarda EIB202, complete genome
CP002775.1: Serratia sp. AS13, complete genome
CP002773.1: Serratia plymuthica AS9, complete genome
CP000720.1: Yersinia pseudotuberculosis IP 31758, complete genome
AM286415.1: Yersinia enterocolitica subsp. enterocolitica 8081, complete genome
CP001608.1: Yersinia pestis biovar Medievalis str. Harbin 35, complete genome
AL590842.1: Yersinia pestis CO92, complete genome
CP000308.1: Yersinia pestis Antiqua, complete genome
ACNR01000027.1: Yersinia pestis biovar Orientalis str. India 195, Chromosome_Sequence27
AAOS02000021.1: Yersinia pestis biovar Orientalis str. IP275, gcontig_1106166534832, wh
CP002038.1: Dickeya dadantii 3937, complete genome
CP001790.1: Pectobacterium wasabiae WPP163, complete genome
CP002124.1: Erwinia sp. Ejp617, complete genome
CP003085.1: Pantoea ananatis PA13, complete genome
FN543093.2: Cronobacter turicensis z3032, complete genome
CP000783.1: Cronobacter sakazakii ATCC BAA-894, complete genome
AP006725.1: Klebsiella pneumoniae subsp. pneumoniae NTUH-K2044, DNA complete genom
CP000647.1: Klebsiella pneumoniae subsp. pneumoniae MGH 78578, complete genome
CP000964.1: Klebsiella pneumoniae 342, complete genome
CP001891.1: Klebsiella variicola At-22, complete genome
CP001918.1: Enterobacter cloacae subsp. cloacae ATCC 13047, complete genome
CP003026.1: Enterobacter asburiae LF7a, complete genome
CP000822.1: Citrobacter koseri ATCC BAA-895, complete genome
CACD01000298.1: Citrobacter freundii str. ballerup 7851/39, whole genome shotgun seque
AETP01000030.1: Salmonella enterica subsp. enterica serovar Montevideo str. 2009085258,
ABEJ01000018.1: Salmonella enterica subsp. enterica serovar Schwarzengrund str. SL480,
CAAZ01000080.1: Salmonella enterica subsp. enterica serovar Typhi str. E98-3139, conti
FM200053.1: Salmonella enterica subsp. enterica serovar Paratyphi A str. AKU_12601,
CP001113.1: Salmonella enterica subsp. enterica serovar Newport str. SL254, comple
AE017220.1: Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67,
ABEL01000007.1: Salmonella enterica subsp. enterica serovar Heidelberg str. SL486, gcon or ABEL01000016.1: Salmonella enterica subsp. enterica serovar Heidelberg str. SL486 gcon
CP002614.1: Salmonella enterica subsp. enterica serovar Typhimurium str. UK-1, com
CP001144.1: Salmonella enterica subsp. enterica serovar Dublin str. CT_02021853, c
AM933172.1: Salmonella enterica subsp. enterica serovar Enteritidis str. P125109, c
CU928145.2: Escherichia coli 55989, chromosome, complete genome
FN649414.1: Escherichia coli ETEC H10407, complete genome
CU651637.1: Escherichia coli LF82, chromosome, complete sequence
CP000247.1: Escherichia coli 536, complete genome
AE014075.1: Escherichia coli CFT073, complete genome
CP002729.1: Escherichia coli UMNK88, complete genome
AP009378.1: Escherichia coli SE15, DNA complete genome
CP002167.1: Escherichia coli UM146, complete genome
CP003034.1: Escherichia coli O7:K1 str. CE10, complete genome
CP000243.1: Escherichia coli UTI89, complete genome
BA000007.2: Escherichia coli O157:H7 str. Sakai, DNA complete genome
AM946981.2: Escherichia coli BL21(DE3), complete genome
CP001368.1: Escherichia coli O157:H7 str. TW14359, complete genome
CP001671.1: Escherichia coli ABU 83972, complete genome
CP002212.1: Escherichia coli str. 'clone D i14', complete genome
AP009048.1: Escherichia coli str. K12 substr. W3110, DNA, complete genome
CP001855.1: Escherichia coli O83:H1 str. NRG 857C, complete genome
AE005174.2: Escherichia coli O157:H7 EDL933, complete genome
AP010960.1: Escherichia coli O111:H- str. 11128, DNA, complete genome
CP001969.1: Escherichia coli IHE3034, complete genome
CP001396.1: Escherichia coli BW2952, complete genome
CP001164.1: Escherichia coli O157:H7 str. EC4115, complete genome
CP002797.2: Escherichia coli NA114, complete genome
CP000800.1: Escherichia coli E24377A, complete genome
CP000802.1: Escherichia coli HS, complete genome
CU928163.2: Escherichia coli UMN026, chromosome, complete genome
CU928164.2: Escherichia coli IAI39, chromosome, complete genome
CP002185.1: Escherichia coli W, complete genome
CP001509.3: Escherichia coli BL21(DE3), complete genome
CP000819.1: Escherichia coli B str. REL606, complete genome
FN554766.1: Escherichia coli 042, complete genome
CP000970.1: Escherichia coli SMS-3-5, complete genome
AP009240.1: Escherichia coli SE11, DNA complete genome
CP001846.1: Escherichia coli O55:H7 str. CB9615, complete genome
CP002516.1: Escherichia coli KO11, complete genome
CP000468.1: Escherichia coli APEC O1, complete genome
CP000946.1: Escherichia coli ATCC 8739, complete genome
U00096.2: Escherichia coli str. K-12 substr. MG1655, complete genome
CP002211.1: Escherichia coli str. 'clone D i2', complete genome
CU928161.2: Escherichia coli S88, chromosome, complete genome
CP000948.1: Escherichia coli str. K12 substr. DH10B, complete genome
AP010953.1: Escherichia coli O26:H11 str. 11368, DNA, complete genome
AP010958.1: Escherichia coli O103:H2 str. 12009, DNA, complete genome
CU928162.2: Escherichia coli ED1a, chromosome, complete genome
CU928160.2: Escherichia coli IAI1, chromosome, complete genome
CP001637.1: Escherichia coli DH1, complete genome
AE005674.2: Shigella flexneri 2a str. 301, complete genome
CP000266.1: Shigella flexneri 5 str. 8401, complete genome
CP001383.1: Shigella flexneri 2002017, complete genome
CP000036.1: Shigella boydii Sb227, complete genome
CP001063.1: Shigella boydii CDC 3083-94, complete genome
CP000038.1: Shigella sonnei Ss046, complete genome
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
import sys
from Bio import Phylo
from io import StringIO
if __name__ == '__main__':
if len(sys.argv) < 3:
print("USAGE : python getNewickTree newickTree listOfSpeciesToKeep")
sys.exit()
newickFile = open(sys.argv[1], 'r')
for line in newickFile:
theTree = line #there should be only one line
phyloTree = Phylo.read(StringIO(theTree), "newick")
listToKeep = []
listNodesToDel = []
listToKeepFile = open(sys.argv[2], 'r')
dictNameToID = {}
for line in listToKeepFile:
if line == "\n":
#print("empty line")
continue
colonSplit = line.split(": ")
name = colonSplit[1].split(",")[0]
name.strip()
name = name.replace(" ", "_")
print("The name: " + name)
dictNameToID[name] = colonSplit[0]
listToKeep.append(name)
#listToKeep = ["Escherichia_coli_TA124", "Escherichia_coli_DEC2B"]
print("List to keep size = " + str(len(listToKeep)))
for element in phyloTree.find_elements():
if element.name is not None and element.name not in listToKeep:
#print("Adding node to listNodesToDel")
listNodesToDel.append(element.name)
for node in listNodesToDel:
#print("Deleting node: " + node)
try:
phyloTree.prune(target=node)
except:
print("Could not delete node: " + node)
print("Tree after pruning:")
print(str(phyloTree))
outputNewick = open("theTree.out", 'w')
Phylo.write(phyloTree, outputNewick, format="newick")
outputNewick.write("\n\nMapping:\n")
for key, value in dictNameToID.items():
outputNewick.write(key + ":" + value + "\n")
import sys
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def name(id):
if '.' in id:
id = id[:id.find('.')]
url = 'http://www.ncbi.nlm.nih.gov/nuccore/%s' % id
try:
f = urlopen(url)
except HTTPError as e:
print("HTTP Error:", e.code)
print("PDB url %s: download failed" % url)
sys.exit(1)
except URLError as e:
print("URL Error:", e.reason)
print("PDB url %s: download failed" % url)
sys.exit(1)
for x in f:
x = x.decode().strip()
if '<title>' in x:
return x[x.find('>')+1:-len(r' - Nucleotide - NCBI<\title>')]
if __name__ == '__main__':
print(name('AFGV00000000'))
This diff is collapsed.
import sys
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def name(id):
if '.' in id:
id = id[:id.find('.')]
url = 'http://www.ncbi.nlm.nih.gov/nuccore/%s' % id
try:
f = urlopen(url)
except HTTPError as e:
print("HTTP Error:", e.code)
print("PDB url %s: download failed" % url)
sys.exit(1)
except URLError as e:
print("URL Error:", e.reason)
print("PDB url %s: download failed" % url)
sys.exit(1)
for x in f:
x = x.decode().strip()
if '<title>' in x:
return x[x.find('>')+1:-len(r' - Nucleotide - NCBI<\title>')]
if __name__ == '__main__':
print(name('AFGV00000000'))
import sys
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def name(id):
if '.' in id:
id = id[:id.find('.')]
url = 'http://www.ncbi.nlm.nih.gov/nuccore/%s' % id
try:
f = urlopen(url)
except HTTPError as e:
return "HTTP Error:" + str(e.code) + "--> failed to find name for id: " + id
#print("PDB url %s: download failed" % url)
#sys.exit(1)
except URLError as e:
return "URL Error:" + e.reason + "--> failed to find name for id: " + id
#print("PDB url %s: download failed" % url)
#sys.exit(1)
for x in f:
x = x.decode().strip()
if '<title>' in x:
return x[x.find('>')+1:-len(r' - Nucleotide - NCBI<\title>')]
#Reads an RFAM alignment and puts it in a dictionary {"name": alignment. Returns the dictionary.}
def readAlignment(filename):
rfamFile = open(filename, 'r')
nameToAlignDict = {}
for line in rfamFile:
tabsplit = line.split("\t")
#print("Name = " + tabsplit[0] + " and align = " + tabsplit[1])
speciesID = tabsplit[0].split("/")[0] #eliminating the positions
nameToAlignDict[speciesID] = tabsplit[1]
return nameToAlignDict
#Finds and returns a list of keys that are present in both dicts.
def intersectDictKeys(dict1, dict2):
listNamesIntersected = []
for key in dict1:
if key in dict2:
listNamesIntersected.append(key)
return listNamesIntersected
if __name__ == '__main__':
#print(name('AFGV00000000'))
if len(sys.argv) < 3:
print("USAGE: python rfamAlignFile1 rfamALignFile2")
sys.exit()
dictFam1 = readAlignment(sys.argv[1])
dictFam2 = readAlignment(sys.argv[2])
intersectionOfNames = intersectDictKeys(dictFam1, dictFam2)
print(str(len(intersectionOfNames)))
for speciesID in intersectionOfNames:
print(speciesID + ": " + name(speciesID))
import sys
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def name(id):
if '.' in id:
id = id[:id.find('.')]
url = 'http://www.ncbi.nlm.nih.gov/nuccore/%s' % id
try:
f = urlopen(url)
except HTTPError as e:
print("HTTP Error:", e.code)
print("PDB url %s: download failed" % url)
sys.exit(1)
except URLError as e:
print("URL Error:", e.reason)
print("PDB url %s: download failed" % url)
sys.exit(1)
for x in f:
x = x.decode().strip()
if '<title>' in x:
return x[x.find('>')+1:-len(r' - Nucleotide - NCBI<\title>')]
if __name__ == '__main__':
print(name('AFGV00000000'))
.((((((((......(((((...().)))))....)))))).)).((((((..((((.....((....))...))))...((.....)).)))).))...
.((((((((......((((((..()))))))....)))))).)).((((((.(((((....(((....)))..))))).(((.....))))))).))...
#
(((((((((UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUAGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00006,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00008,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00012)91.00:0.00003)98.00:0.00089,UGCUCAUUUCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUUCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00130)98.00:0.00050,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUAGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00119)96.00:0.00016,UGCUCAUUCUAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCUAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00140)70.00:0.00002,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00102,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00116)12.00:0.00066)97.00:0.00077,(((GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00074,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00028)19.00:0.00085,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00129)97.00:0.00319,((((GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00002,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00005)63.00:0.00091,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00097,(GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00099,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00113)27.00:0.00004)27.00:0.00013)23.00:0.00002,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUAGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00131)96.00:0.00062,((UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00141,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00189)94.00:0.00155,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCCAC-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00234)91.00:0.00043)97.00:0.00059)98.00:0.00004)96.00:0.00104,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00177,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00166)95.00:0.00093)97.00:0.00000,((((GGCUCAUUCGCCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00206,((GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00071,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00004)93.00:0.00190,GGCUCAUUCGCCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCGCCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00172)20.00:0.00000)44.00:0.00005,(((UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00030,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00036)90.00:0.00006,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00001,(GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00022,GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00012)76.00:0.00007)95.00:0.00014)97.00:0.00129,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00226)60.00:0.00009)64.00:0.00062,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00086,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00105)51.00:0.00171)63.00:0.00212,(GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCUAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCCUU:0.00221,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGACGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACACAACGUUGAGUGAAGCACCACU-GGCUCAUUCACCGACUUAUGUCAGCACAUAAAAUUGAAUGACGCAGGGCCUGCGUCCACUUUGAUCUAUCAUCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00210)100.00:0.00075)100.00:0.00039)97.00:0.00381,(((UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACAGAGAUCCAUCAAUGGACAUAACGUUGAGUGAAGCACCUUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.02365,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACAGAGAUCCAUCAAUGGACAUAACGUUGAGUGAAGCACCUUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACAGAGAUCCAUCACCGGACAUAACGUUGAGUGAAGCACCUUU:0.03183)85.00:0.00668,(((((GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00290,GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00108)95.00:0.00047,(GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAACUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAACUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00099,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACUGGACACAACGUUGAGUGAAGCACCUUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAACUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00124)1.00:0.00000)0.00:0.00020,(GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00251,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACUGGACACAACGUUGAGUGAAGCACCUUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCUUU:0.00279)15.00:0.00027)45.00:0.00044,(UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACUGGACACAACGUUGAGUGAAGCACCUUA-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCUUU:0.00582,GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCUUU:0.00238)96.00:0.00040)97.00:0.00025,(GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUUUGAAUUAUCACCGGGCGAAACGUCGAGUUAGGCACCCUU:0.00341,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACUGGACACAACGUUGAGUGAAGCACCUUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACACAACGUUGAGUGAAGCACCUUU:0.00125)62.00:0.00072)100.00:0.03213)85.00:0.00208,((((GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAUCUUUCACCGGGCGACACGUUGAGUAAGGCACCCUG-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUGUGAAUUUUCACCGGGCGACACGUUGAGUAAGGCACCCUG:0.00874,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACCAGAAUGACGCCGGGCUUACGUCCACAGAGAUCCAUCAAUGGACAUAACGUUGAGUGAAGCACCAUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACCAGAAUGACGCCGGGCUUACGUCCACAGAGAUCCAUCACCGGACAUAACGUUGAGUGAAGCACCAUU:0.01083)57.00:0.09021,(((UGCUCAUUCAACUGGUUAUGAUUGCUCAUAACAUUGAAUGAUGCGGGGCCUACGUCCACAGCGAACUAUCAUCGGGCGACACGUUGAGUGAGGCACCCUA-UGCUCAUUCAACUGGUUAUGAUGCGUCAUAACAUUGAAUGAUGCGGGGCCUACGUCCCCAGCGAACUAUCAUCGGGCGACACGUUGAGUGAGGCACCCUA:0.09613,UGCUCAUUCAACUGGUUAUGAUGGCUCAUAACGCCGAAUGAUGCAGGGCCUACGUCCAUUUUGAACUAUCAUCGGGCGACACGUUGAGUGAGGCACCCUA-UGCUCAUUCAACUGGUUAUGAUGUGUCAUAACGCCGAAUGAUGCAGGGCCUACGUCCGCUUUGAACUAUCAUCGGGCGACACGUUGAGUGAGGCACCCUA:0.11281)96.00:0.05374,((((UGCUCAUUCCACUCAUUAUGAUAGCUCAUAAACCAGAAUGAUGCCGGGCCUAUGUCCAAUAAGAAACAUCAAUGGACGAAACGUUGAGUGAGGCACCUCC-UGCUCAUUCCACUCAUUAUGAUAGCUCAUAAACCAGAAUGAUGCCGGGCCUAUGUCCAUAUAGAAACAUCAAUGGACGAAACGUUGAGUGAGGCACCCCG:0.08231,UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCCGGGCCUAUGUCCAAUUAGAAUCAUCAACGGGCGAAACGUUGAGUGAGGCACCUCC-UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCCGGGCCUAUGUCCAUUUUGAAACAUCAACGGGCGAAACGUUGAGUGAGGCACCCCG:0.18368)2.00:0.04626,UGCUCAUUCCACUCCUUAUGACAGCUCAUAAACCAGAAUGAUGCCGGGCCUACGUCCAGUUCGAAACAUCACAGGACGAAACGUUGAGUGAGGCACCUCC-UGCUCAUUCCACUCCUUAUGACAGCUCAUAAACCAGAAUGAUGCCGGGCCUACGUCCCGUUCGAAACAUCACAGGACGAAACGUUGAGUGAGGCACCCCG:0.14659)87.00:0.16591,(UGCUCAUUCGCCCCCUUAUGUUUGCUCAUAAAUCGGAAUGAUGCUGGGCCUACGUCCGAAUUGCUCCAGCAACGGACACGUCGUUGAGUGAGGCACCUCA-UGCUCAUUCGCCCCCUUAUGUUUGCUCAUAAAUCGGAAUGAUGCUGGGCCUACGUCCAAUCUGCUCCAGCAACGGACACGUCGUUGAGUGAGGCACCUCA:0.01271,UGCUCAUUCGCCCCCUUAUGUUUGCUCAUAAAUCGGAAUGAUGCUGGGCCUACGUCCGAAUUGCUCCAGCAACGGACACGUCGUUGAGUGAGGCACCUCA-UGCUCAUUCGCCCCCUUAUGUUUGCUCAUAAAUCGGAAUGAUGCUGGGCCUACGUCCAAUCUGCUCCAGCAACGGACACGUCGUUGAGUGAGGCACCUCA:0.01633)26.00:0.19555)45.00:0.00000,(((UGCUCAUUCACCUUUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCAUAACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCACCCUA-UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCUGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU:0.00000,((UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCAGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU-UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCUGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU:0.02902,UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCAGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU-UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCUGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU:0.01020)47.00:0.00000,((UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCAGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU-UGCUCAUCCCACUUAUUAUGACAGCUCAUAAACCAGGAUGACGCGAGGCCUAUGUCCUGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACUUAU:0.00011,UGCUCAUUCACCUUUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCAUAACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCACCCUA-UGCUCAUUCACCUUUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCCCUACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCACCCUA:0.00000)30.00:0.00000,UGCUCAUUCACCUUUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCAUAACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCACCCUA-UGCUCAUUCACCUUUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCCCUACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCACCCUA:0.00618)51.00:0.00482)80.00:0.00252)96.00:0.02880,UGCUCAUUCACCUCUUUAUGAUAGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCACAACGAUUUAUCAUCGGACGACACGUUGAGUGAGGCGCCCUA-UGCUCAUCCCACUUAUUAUGAUAGCUCAUAAACCAGGAUGACGCGGGGCCUAUGUCCUGUACGAUUCAUCACCGGGCAUAACGUUGAGUGAGGCACCCAU:0.03414)96.00:0.07287,(UGCUCAUUCCACCUCUUAUGUUCGCUCAUAAACCUGAAUGAUGCGGGGCCUAUGUCCACAGCGAACCAUCACCGGGCAUAACGUUGAGUGAGGCACCCCC-UGCUCAUUCAACUUUUUAUGAUGGCUCAUAAGCCCGAAUGAUGCUGGGCCUACGUCCACCACGAACUAUCAUCGGGCGACACGUGGAGUGAGGCACCCUG:0.01254,UGCUCAUUCCACCUCUUAUGUUCGCUCAUAAACCUGAAUGAUGCGGGGCCUAUGUCCACAGCGAACCAUCACCGGGCAUAACGUUGAGUGAGGCACCCCC-UGCUCAUUCCACCUCUUAUGUUCGCUCAUAAACCUGAAUGAUGCGGGGCCUAUGUCCCCAGCGAACCAUCACCGGGCAUAACGUUGAGUGAGGCACCCCC:0.03652)29.00:0.07913)52.00:0.02192)94.00:0.01375)97.00:0.04726,(GGCUCAUUCACCCGGUUAUGACAGCCCAUAACAUCGAAUGACGCCGGGCCUGCGUCCACCCCGAAUUAUCAUCGGACGCCACGUUGAGUCAGGUACCCUA-GGCUCAUUCACCCGGUUAUGACAGCCCAUAACAUUGAAUGACGCCGGGCCUGCGUCCCCCCCGAAUUAUCAUCGGACGCCACGUUGAGUCAGGUACCCUA:0.12892,UGCUCAUUCAUCUGUUUAUGAUUGCUCAUAAAAUCGAAUGACGCCGGGCCUGCGUCCACCACGAAUUAUCAUCGGACGCAACGUUGAGUUAGGCUCCCCA-UGCUCAUUCAUCUGUUUAUGAUUGCUCAUAAAAUCGAAUGACGCCGGGCCUGCGUCCCCCACGAAUUAUCAUCGGACGCAACGUUGAGUUAGGCUCCCCA:0.11159)99.00:0.04734)98.00:0.06592)97.00:0.02944,((GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAACUAUCAUCGGGCGAAACGUCGAGUAAGGCACCCUC-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACAUUACGUUGAGUGAAGCACCAUU:0.00648,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACAUUACGUUGAGUGAAGCACCAUU-UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCACCGGACAUUACGUUGAGUGAAGCACCAUU:0.00165)71.00:0.00487,(GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAACUAUCAUCGGGCGAAACGUCGAGUAAGGCACCCUC-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUCUGAACUAUCAUCGGGCGAAACGUCGAGUAAGGCACCCUC:0.00156,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUCGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGACAUUACGUUGAGUGAAGCACCAUU-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUCUGAACUAUCAUCGGGCGAAACGUCGAGUAAGGCACCCUC:0.00086)98.00:0.00522)99.00:0.05774)98.00:0.00205,(GGCUCAUUCACCCUCUUAUGUCAGCACAUAAACUCGAAUGACGCAGGGCCUGCGUCCAACUUGAAAUAUCACAGGGCGUAACGUCGAGUUAGGCACCCUC-GGCUCAUUCACCCUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUAUGAAAUAUCACAGGGCGUAACGUCGAGUUAGGCACCCUC:0.01964,UGCUCAUUCCAUCUCUUAUGUUCGCUCAUAAACUAGAAUGAUGCCGGGCUUACGUCCACUGAGAUCCAUCAAUGGGCAUAACGUUGAGUGAAGCACCAUA-GGCUCAUUCACCUUCUUAUGUCAGCACAUAAACUUGAAUGACGCAGGGCCUGCGUCCACUAUGAAUUAUCACAGGGCGAAACGUCGAGUUAGGCACCCUC:0.01620)98.00:0.03634)98.00:0.02469)84.00:0.03348)100.00:0.00000;
\ No newline at end of file
This diff is collapsed.
The algorithms are implemented in:
Src/fitchAndCompany.py
Given a newick tree, simply run: python fitchAndCompany <alg> tree.newick
where <alg> can be: 1=Fitch 2=Sankoff 3=one structure 4=two structures
Different tools in Src/
frnakenstein_generator.py -Generates by-stable sequences for pair of structures using frnakenstein tool
incubator.py -Given a seed, target structures a tree, a mutation rate and a transition matrix,
populates a tree evolving from the seed
data_generator.py -Executes incubator.py
run_fitchAndCompany.py -Executes fitchAndCompany.py
make_rfam_tree.py -Generates the tree to input to fitchAndCompany given BioData/
analysis.py -Generates the graphs given the output of fitchAndCompany.py
File added
import os
import sys
import re
import cPickle
from math import log, fsum
from numpy import std, average
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
MUT = ['0.01', '0.05', '0.1']
DATA = os.path.join('..', 'Data_sank_results')
re_tot = re.compile(r'Total: (\d+) errors / (\d+) nucleotides \((\d+\.\d+) % error\)') #For the best
re_tot_uns = r'Total for unstructured positions: (\d+) errors / (\d+) nucleotides \((\d+.\d+) % error\)'
re_tot_str = r'Total for structured positions: (\d+) errors / (\d+) nucleotides \((\d+.\d+) % error\)'
re_avg = r'Average \(over (\d+) optimal sequences\): (\d+.\d+) aver. errors per optimal sequence of length = (\d+.\d+) nucleotides \((\d+.\d+) % error\)'
re_Uns = r'Average for unstructured positions: (\d+.\d+) aver. errors per optimal sequence, considering (\d+.\d+) nucleotides per seq. \((\d+.\d+) % error\)'
re_Struct = r'Average for structured positions: (\d+.\d+) aver. errors per optimal sequence, considering (\d+.\d+) nucleotides per seq. \((\d+.\d+) % error\)'
re_tot_err = r'Total: (\d+) errors' #Best root 4 next lines
re_opt = r'Average \(over (\d+) optimal sequences\): (\d+.\d+) aver. errors per optimal sequence of length = 100.0 nucleotides \((\d+.\d+) % error\)' #All optimales
re_opt_uns = r'Average for unstructured positions: (\d+.\d+) aver. errors per optimal sequence, considering (\d+.\d+) nucleotides per seq. \((\d+.\d+) % error\)'
re_opt_str = r'Average for structured positions: (\d+.\d+) aver. errors per optimal sequence, considering (\d+.\d+) nucleotides per seq. \((\d+.\d+) % error\)'
#Average pair de structures
#Taux d'erreurs Y / mutation X
#% Total, Uns, Struct
def parser(lines):
data = {x:[] for x in ('tot', 'tot_uns', 'tot_str', 'avg', 'Uns', 'Struct', 'tot_err', 'opt', 'opt_uns', 'opt_str')}
algos = zip(('tot', 'tot_uns', 'tot_str', 'avg', 'Uns', 'Struct', 'tot_err', 'opt', 'opt_uns', 'opt_str'),
(re_tot, re_tot_uns, re_tot_str, re_avg, re_Uns, re_Struct, re_tot_err, re_opt, re_opt_uns, re_opt_str))
z = 0
for x in lines:
if "-For the root only:" in x:
z += 1
if z == 3:break
for name, alg in algos:
g = re.match(alg, x)
if g:
data[name].append(g.groups())
continue
return data
def root_parser(lines):
found = False
z = 0
for x in lines:
if "-For the root only:" in x:
z += 1
if z == 3:
found = True
if found:
g = re.match(re_opt, x)
if g:
tot = g.groups()[0]
return tot
def graph_by_alg(data):
def plot_honor(data, color):
tot = []
tot_err = []
str = []
str_err = []
uns = []
uns_err = []
ind_data = data.set_index('mut')
all_plot = [('total', tot, tot_err),
('structured', str, str_err),
('unstructured', uns, uns_err)]
for mut in MUT:
try:
tot.append(ind_data.loc[mut, 'tot'])
#print tot[-1]
tot_err.append(ind_data.loc[mut, 'tot_err'])
str.append(ind_data.loc[mut, 'str'])
str_err.append(ind_data.loc[mut, 'str_err'])
uns.append(ind_data.loc[mut, 'uns'])
uns_err.append(ind_data.loc[mut, 'uns_err'])
except KeyError:
print mut
cols = {'total':'k', 'structured':'red', 'unstructured':'blue'}
for n, p, e in all_plot:
if len(p) != 3:
continue
plt.errorbar([0.01, 0.05, 0.1], p, yerr=e, label=n, color=cols[n])
plt.xlabel([0.01, 0.05, 0.1])
plt.legend()
g = {}
for name in data:
_, s1, s2, rep, mut, alg, _ = name.split('_')
g.setdefault((s1, s2, alg, mut), {})
g[s1,s2, alg, mut].setdefault('tot', []).append(
float(data[name]['tot'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Uns', []).append(
float(data[name]['Uns'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Struct', []).append(
float(data[name]['Struct'][-1][-1]))
d = {'ss':[], 'alg':[], 'tot':[],
'tot_err':[], 'uns':[], 'uns_err':[],
'str':[], 'str_err':[],
'mut':[]}
for s1,s2,alg, mut in g:
d['mut'].append(mut)
d['ss'].append(s1+s2)
d['alg'].append(alg)
d['tot'].append(average(g[s1,s2,alg,mut]['tot']))
d['tot_err'].append(std(g[s1,s2,alg,mut]['tot']))
d['str'].append(average(g[s1,s2,alg,mut]['Struct']))
d['str_err'].append(std(g[s1,s2,alg,mut]['Struct']))
d['uns'].append(average(g[s1,s2,alg,mut]['Uns']))
d['uns_err'].append(std(g[s1,s2,alg,mut]['Uns']))
data = pd.DataFrame(d)
sns.set(font_scale=1.5)
g = sns.FacetGrid(data, col="ss", row='alg', row_order=['1', '2', '3', '4'],
col_order = ['01', '02', '12'])
g.map_dataframe(plot_honor).set_axis_labels("Mutation rate", "Percentage Error").add_legend().set(xticks=[0.01, 0.05, 0.1])
plt.tight_layout()
plt.savefig('test.pdf')
plt.close()
#plt.show()
def _graph_by_Region(data):
def plot_honor(data, color):
d_algs_val = {str(x):[] for x in range(1,5)}
d_algs_err = {str(x):[] for x in range(1,5)}
d_muts = {str(x):[] for x in range(1,5)}
ind_data = data.set_index('alg')
for x in range(1, 5):
x = str(x)
m = ind_data.loc[x, 'mut']
m = {z:i for i,z in enumerate(m)}
vals = ind_data.loc[x, 'vals']
errs = ind_data.loc[x, 'errs']
for mut in MUT:
if mut in m:
d_algs_val[x].append(vals[m[mut]])
d_algs_err[x].append(errs[m[mut]])
d_muts[x].append(mut)
cols = dict(zip('1234', 'kbgr'))
for x in range(1, 5):
x = str(x)
print d_algs_val[x]
eb = plt.errorbar(d_muts[x], d_algs_val[x], yerr=d_algs_err[x], label=x, color=cols[x], alpha=0.5 if x != '1' else 1, linewidth=3.0)
eb[-1][0].set_linestyle('--')
eb[-1][0].set_linewidth(1)
#plt.xlabel([0.01, 0.05, 0.1])
plt.legend()
g = {}
for name in data:
_, s1, s2, rep, mut, alg, _ = name.split('_')
g.setdefault((s1, s2, alg, mut), {})
g[s1,s2, alg, mut].setdefault('tot', []).append(
float(data[name]['tot'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Uns', []).append(
float(data[name]['Uns'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Struct', []).append(
float(data[name]['Struct'][-1][-1]))
d = {'ss':[], 'alg':[], 'errs':[],
'mut':[], 'Region':[], 'vals':[]}
for s1,s2,alg, mut in g:
for x in ('tot', 'Struct', 'Uns'):
d['mut'].append(mut)
d['ss'].append(s1+s2)
d['alg'].append(alg)
d['Region'].append(x)
d['vals'].append(average(g[s1,s2,alg,mut][x]))
d['errs'].append(std(g[s1,s2,alg,mut][x]))
data = pd.DataFrame(d)
sns.set(font_scale=1.5)
g = sns.FacetGrid(data, col="ss", row='Region', row_order=['tot', 'Struct', 'Uns'],
col_order = ['01', '02', '12'])
g.map_dataframe(plot_honor).set_axis_labels("Mutation rate", "Percentage Error").add_legend().set(xticks=[0.01, 0.05, 0.1], yticks=[0,2,4,6,8])
plt.tight_layout()
plt.savefig('error.pdf')
plt.close()
#plt.show()
def graph_by_Region(data):
def plot_honor(data, color):
d_algs_val = {str(x):[] for x in range(1,5)}
d_algs_err = {str(x):[] for x in range(1,5)}
d_muts = {str(x):[] for x in range(1,5)}
ind_data = data.set_index('alg')
for x in range(1, 5):
x = str(x)
m = ind_data.loc[x, 'mut']
m = {z:i for i,z in enumerate(m)}
vals = ind_data.loc[x, 'vals']
errs = ind_data.loc[x, 'errs']
for mut in MUT:
if mut in m:
d_algs_val[x].append(vals[m[mut]])
d_algs_err[x].append(errs[m[mut]])
d_muts[x].append(mut)
cols = dict(zip('1234', 'kbgr'))
for x in range(1, 5):
x = str(x)
print d_algs_val[x]
eb = plt.errorbar(d_muts[x], d_algs_val[x], yerr=d_algs_err[x], label=x, color=cols[x], alpha=0.5 if x != '1' else 1, linewidth=3.0)
eb[-1][0].set_linestyle('--')
eb[-1][0].set_linewidth(1)
#plt.xlabel([0.01, 0.05, 0.1])
plt.legend()
g = {}
for name in data:
_, s1, s2, rep, mut, alg, _ = name.split('_')
g.setdefault((s1, s2, alg, mut), {})
g[s1,s2, alg, mut].setdefault('tot', []).append(
float(data[name]['tot'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Uns', []).append(
float(data[name]['Uns'][-1][-1]))
g[s1,s2, alg, mut].setdefault('Struct', []).append(
float(data[name]['Struct'][-1][-1]))
d = {'ss':[], 'alg':[], 'errs':[],
'mut':[], 'Region':[], 'vals':[]}
for s1,s2,alg, mut in g: