Commit 21d6fa68 authored by Roman Sarrazin-Gendron's avatar Roman Sarrazin-Gendron
Browse files

first summer commit

parent 03dfe6ff
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# STOCKHOLM 1.0
#=GF AC RF00168
#=GF ID Lysine
#=GF DE Lysine riboswitch
#=GF AU Wickiser JK, Barrick JE, Breaker RR
#=GF SE Wickiser JK, Barrick JE
#=GF SS Published; PMID:12787499
#=GF GA 30.00
#=GF TC 30.10
#=GF NC 29.70
#=GF TP Cis-reg; riboswitch;
#=GF BM cmbuild -F CM SEED
#=GF CB cmcalibrate --mpi CM
#=GF SM cmsearch --cpu 4 --verbose --nohmmonly -E 1000 -Z 549862.597050 CM SEQDB
#=GF DR SO; 0000035; riboswitch;
#=GF RN [1]
#=GF RM 12787499
#=GF RT Riboswitches Control Fundamental Biochemical Pathways in Bacillus subtilis
#=GF RT and Other Bacteria.
#=GF RA Mandal M, Boese B, Barrick JE, Winkler WC, Breaker RR
#=GF RL Cell 2003;113:577-586.
#=GF RN [2]
#=GF RM 14523230
#=GF RT The L box regulon: Lysine sensing by leader RNAs of bacterial lysine
#=GF RT biosynthesis genes.
#=GF RA Grundy FJ, Lehman SC, Henkin TM
#=GF RL Proc Natl Acad Sci U S A 2003;100:12057-12062.
#=GF RN [3]
#=GF RM 14597663
#=GF RT An mRNA structure in bacteria that controls gene expression by binding
#=GF RT lysine.
#=GF RA Sudarsan N, Wickiser JK, Nakamura S, Ebert MS, Breaker RR
#=GF RL Genes Dev 2003;17:2688-2697.
#=GF RN [4]
#=GF RM 14627808
#=GF RT Regulation of lysine biosynthesis and transport genes in bacteria: yet
#=GF RT another RNA riboswitch?.
#=GF RA Rodionov DA, Vitreschak AG, Mironov AA, Gelfand MS
#=GF RL Nucleic Acids Res 2003;31:6748-6757.
#=GF CC Riboswitches are metabolite binding domains within certain messenger RNAs
#=GF CC that serve as precision sensors for their corresponding targets.
#=GF CC Allosteric rearrangement of mRNA structure is mediated by ligand binding,
#=GF CC and this results in modulation of gene expression. This family includes
#=GF CC riboswitches that sense lysine [1] in a number of genes involved in lysine
#=GF CC metabolism, including lysC [3].
#=GF WK Lysine_riboswitch
#=GF SQ 47
U00006.1/98763-98567 CAGGCCAGAAGAGGC-GCGUUG-CCC-A--A--GUAA-CGGUGUUGGA---------GGAGCCAGUCCUGUG---AUAACAC-CU-GAGGGG-GUG-C-AUCGCCGAGGUGAUUGAACGGCUGGCC-A----CGUUCA-U-CAUCGGCUACA-GGGGCUGAAU-CCCCUG-GGUUGUCACCAGA--AGCGUUCGCAGUCGGGCGUUUCGC---------------------------------------AAGUGGUGG-AGCACUUCUGGGUGA
J03294.1/2297-2476 GGUGAAGAUAGAGGU-GCGAAC-UUC-AAGA--GUA--UGCCUUUGGAGA-------AAGAUGGAUUCUGUG---AAAAAGG-CU-GAAAGG-GGA-GCGUCGCCGAAGCAAAUAAAACCCCAUCG------GUAUUAUU-UGCUGGCCGUG-CAUU--GAAUAAAUGUAAGGCUGUCAAGAAA--UCA------------------------------------------------------------UUUUCUUGG-AGGGCUAUCUCGUUG
AF269536.1/680-500 AAUAGAGUUAGAGGUUGCAUUA-UUA-AUGA--CUA--ACUUAUCAGAAGUCGU---AUGGGACAUGUGUUG---A--AUAA-GU-GAAAGG-UAA-U-AAUGCCGAAAUGAUGUUAUUUC-CAUA-A----AUUAGCAU-UGUGAAGUUGG-UUGAA-CAAUAAAAACAUCACAACCACGAAU--GCU--------------------------------------------------------------UUCUUCA-AUAUUUAUUUGAAUU
AF270308.1/2156-2331 AAUAGAGUUAGAGGUUGCAUUA-UUA-AUGA--CUA--ACUUAUCAGAAGUCGU---AUGGGACAUGUGUUG---A--AUAA-GU-GAAAGG-UAA-U-AAUGCCGAAAUGAUGUUAUUUC-CAUA-A----AUUAGCAU-UGUUGGGACAA-CUUUC-GAAUAGAAGUUGUACUGUCACU-----UUA----------------------------------------------------------------UGUGA-UGUGCUACCUUAUAU
M93419.1/332-511 AGUGAAGAUAGAGGU-GCGAAC-UUC-AUCA--GUA--AAAGCUUGGAGAA------GAAUGAGCUUCAAUG---AAAAGCU-UU-GAAAGG-GAACG-UUCGCCGAAGUGAAGAAAAA---CUCAUU----UUUUUCUU-UGCUGGUCCUG-CAUU--UAAGAGAUGCCGGAUUGUCAAGGCGG-UGC-------------------------------------------------------------CGCCUUGG-AGAGCUAUCUCACUG
AF306669.1/1019-1194 AUAUUUUGAUGAGGC-GCAUCA-AUC-AUGA--GUA--AAGUUUAGAUUAC------UGUCUGCUAACAG------CUAAAU-UU-GAAAGG-GUG-C-GAUGCCGAAGC-AAUUAUAAU--AGCA-G----UUAUAAUU-UGUUGGACUUUUUGGU--UAAGAGCUGAGAGUUUGUCAUUAUU--UAA------------------------------------------------------------AAAUAAUGG-AGUGCAUCACUUGUA
X00008.1/296-492 CAGGCCAGAAGAGGC-GCGUUG-CCC-A--A--GUAA-CGGUGUUGGA---------GGAGCCAGUCCUGUG---AUAACAC-CU-GAGGGG-GUG-C-AUCGCCGAGGUGAUUGAACGGCUGGCC-A----CGUUCA-U-CAUCGGCUAAG-GGGGCUGAAU-CCC-CUGGGUUGUCACCAGA--AGCGUUCGCAGUCGGGCGUUUCGC---------------------------------------AAGUGGUGG-AGCACUUCUGGGUGA
X15196.1/270-75 GCAGCCAGAAGAGGC-GCGUUG-CCA-A-----GUAA-CGGUGUUGGA---------GGAGCCAGUCCUGUG---AUAACAC-CU-GAGGGG-GUG-C-AUCGCCGAGGUGAUUGAACGGCUGGCC-A----CGUUCA-U-CAUCGGCUACA-GGGGCUGAAU-CCCCUG-GGUUGUCACCAGA--AGCGUUCGCAGUCGGGCGUUUCGC---------------------------------------AAGUGGUGG-AGCACUUCUGGGUGA
AL591976.1/186683-186486 UGGUGAGGUAGAGGUUGCGAGA-UGC-ACUA--GUA--AUUUUUUCGAGGCGAA---ACAAAGACGCCGACG---ACAAAGA-AU-GAACAG-GUU-G-AUCGCCGAAGUGACUAUUUUCU-CUUU-GUUUAGAAAUAGU-UGUUGGGACAG-UUUCC-UAAA-GGGGCUGGACUGCUAUAAGAA-UUUGUCGAAAU----------------------------------------------------UUCUUAUAGGUGUGCUAUCUGACAA
AL596166.1/112469-112272 UGGUGAGGUAGAGGUUGCGAGA-UGC-ACUA--GUA--AUUUUUUCGAGGCGAA---ACAAAGACGCCAAUG---ACAAAAA-AC-GAACAG-GUU-A-AUCGCCGAAGUGACUAUUUUUU-CUUU-GUAUCGAAAUAGU-UGUUGGGACAG-UUUCC-UAAA-GGAGCUGGACUGCUAUAAGAA-UUUGUCGAAAU----------------------------------------------------UUCUUAUAGGUGUGCUAUCUGACAA
AAMR01000029.1/5147-5324 AUCGCACGUAGAGGC-GCAAUU-AUA-AAAA--GUA--GUUUUCGUU----------GGGGUGAUGCCAAUG---AGCGGGA-AU-GAAAGG-UCU-A-AUUGCCGAAGUAAAUUGUAU---AUCU------AAGCAAUU-UGCUGGGGUUG-UGCU--CAAUAGGUACAACACUGCCAUAGUC--UUAAUUUU-------------------------------------------------------AAACUAUGG-AGCGCUACUGUAGGG
CP000002.3/2878377-2878198 GGUGAAGAUAGAGGU-GCGAAC-UUC-AAGA--GUA--GGCUUGAUGAGGA------AGAUGGAUUCCGAUG---AAGAAAG-CC-GAAAGG-GGA-GCGUCGCCGAAGCGGGGAAAAAUC-CACU-C----GUUUUUCC-UGCUGGCUUUA-CAUU--GAAUAAAUGUGAGGCUGUCAAGAAA--UCA-------------------------------------------------------------UUUCUUGG-AGAGCUAUCUCGUUG
BX950851.1/4451556-4451748 CAAGCCAGAAGAGGC-GCGUCG-CCC-AG----GUA--AGAUAUCGGAGG-------AACCGUAAUCCGCUG---AUGAUAU-CC-GAGGGG-GAG-C-GACGCCGAGAUGCGGUGAAAUU-CGGC------UUUCACCC-UAUCGACUACA-GAGGCUGAAU-CCUCUG-GGUUGUCACCGGA--UUCGUCCUGAUGGACGUCCA-------------------------------------------GCAAGGUGG-AGCGCUUCUGGGUGU
EF419299.1/175-1 AAAAGAGUUAGAGGUUGCGUCU-UUA-AUUA--GUA--ACACUUCAGAAGUUAUU--AAGGAACGUGUGUUG---A--AAGU-GU-GGAAGG-UAA-A-GAUGCCGAAAUGAAUGAUACC--CUUA-A----UUAUCAUU-UGUUGGGACAG-UUAUC-GAAUAGAAACUGUACUGUCACA-----AUU----------------------------------------------------------------UGUGA-UGUGCUACC-GACGG
CP000046.1/1759610-1759435 AAUUGAGUUAGAGGUUGCAUGU-UUA-AUUA--GUA--ACUUGUCAGAAGUAUUU--AUGGUACAUAAGUUG---A--ACAA-GU-GAAAGG-UAA-A-GAUGCCGAAAUAGAUAUAAACC-AUAA------AUUAUAUC-UAUUGGGACAG-UUUUC-GAAUAGGAACUGUACUGUCACA-----GAA----------------------------------------------------------------UGUGA-UGUGCUACCUUAUAU
BA000037.2/1373591-1373770 UAUCGACGUAGAGGC-GCAAUG-GUA-AAGA--GUA--ACUAUUAUU----------GGGGUGAUGCCAAUG---AAUAAUA-GU-GAAAGG-UAU-CCAUUGCCGAAGUGAAUUGCAU---AUCA------AAGCAGUU-UGCUGGGGUUG-CAUCC-GAAA-GGAACAACACUGCCAUAGUA--UUUAAUGUA------------------------------------------------------UAACUAUGG-AGCGCUACUGUAGGU
CP000647.1/4829910-4829675 AUUGCCAGAAGAGGU-GCGUUG-CCC-AG----GUAA-CCGUAUUGGA---------GGAACCAGUCCGGGG---AAGAUAC-GU-GAGGGG-GAG-C-AACGCCGAGGUGAAAGAGCGUCUGGUC-A----CGCUCC-U-CGCCGGCCACA-GGGGCUGAAU-CCCCUG-GGUUGUCACCAGA--AACGUUCGCAGUCGGGCGUUUCAUACCCGAACACCUCAGCGCCCAAAGCGGUGAGUCGGGUCUACAAGGUGG-AGCACUUCUGGGUGA
BA000004.3/1819757-1819575 AGUGAGGAUAGAGGU-GCAAAA-ACC-AAGA--GUA--CACAAUUGGAGGA------GAAUGAGAUCCGUUG---AGAAUUG-UG-GAAAGG-GGA-A-UUUGCCGAAGC---UGGAAGAAUCUCA-U----GUUCUGAA-GGCUGGUUCUG-UAUU--AAAUAAAUACAGAACUGUCAUAUAGCGGAUGUU---------------------------------------------------------GCUAUAUGG-AGGGCUAUCUCACGC
ABCF01000065.1/4547-4725 AGUGGUGAUAGAGGU-GCGGAC-UUUCAUCA--GUA--GAUGCUUCGAGGA------GUAUGGGCUCUUAUG---AAGGGUA-UC-GAAAGG-GAA-A-UUCGCCGAAGUGGAAGCAGGCU-CAUA------UCUGUUUG-CGCUGGUUCUG-UGUU--UAAGAGAUGCAGGAUUGUCAAGAUUA-ACA--------------------------------------------------------------GUCUUGG-AGAGCUAUCUCACGU
AE017143.1/992934-993104 ACAAAUUGUAGAGGU-GCAAAU-CCG-AUAA--GUA--UUUCUUCU-----------GAGUGGAAAGCGAUG---AAGGGGA-AG-GAAAGG-CGU-A-UUUGCCGAAAUCAAUUAAGC---GUCA------UCUUAGUU-GGUUGGGGUCG-UUGCC-GAAA-GGGACGACACUGUCGUAAUU--CAA-------------------------------------------------------------UAUUACGG-AGUGCUACUAUUAGG
CP000002.3/3366091-3365915 UAGUGAGGUAGAGGUUGCGCGG-AUG-AUGA--GUC--GCAUGUGUGAGG-------CUGAUGGGGCCGAUG---AUCAUAU-GC-AAAAGG-CAU-C-AGCGCCGAAGCAUAAGGAAGCC-AUUC------AUUCUUUA-UGCUGGGUCUG-CAUU--GAAUAAGUGCAGGACUGCCGCGGGU--AUU--------------------------------------------------------------CCCGCGG-AGGGCUAUCCGGAGA
CP000029.1/981298-981477 AGAUUUUGAUGAGGC-GCAUCA-AUC-AUGA--GUA--AACUUUAGAUAAU------UUGUCUGCUAACAAU---UAUAGAG-UU-AAAAGG-GUG-A-GAUGCCGAAAUGAUUCAUAAU--AGCA-G----UUAUGAAU-CGUUGGACUUAAUGGU--UAAGAGCUAUAAGUUUGUCAUUAUU--AUU------------------------------------------------------------AAAUAAUGG-AGUGCAUCACUUGUA
AAWP01000069.1/5541-5721 UAUUGCCGUAGAGGU-GCAGUC-UCG-AAGA--GUA--GCUAUUAUU----------GGGGUGAUGCCAAUG---AAUAAUA-GU-GGAAGG-CGA-GGAUUGCCGAAGUAGGUGGCCU---AUCG------AAGCCACU-UGCUGGGGUUG-UCUCU-GAAA-GGAACAACACUGCCAUAGUA--UAUUUACAUU-----------------------------------------------------AAACUAUGG-AGCGCUACUGUAGGG
AAKK02000085.1/12844-12662 UGUUGCCGUAGAGGC-GCGGUC-UCG-AAGA--GUA--GCUAUUAUU----------GGGGUGAUGCCAAUG---AAUAAUA-GU-GGAAGG-CGAAG-AUUGCCGAAGUAAGUAACUC---AUCA------AAGUUACU-UGCUGGGGUUG-UAUCU-GAAA-GGAACAACACUGCCAUAGUA--UAUAUUCACAUU---------------------------------------------------AAACUAUGG-AGCGCUACUGUAGGG
CP000416.1/1669940-1669758 UCGAAUAGAAGAGGC-GCGACC-AAC-AAGA--GUC--GCUUUCCGGAGA-------UGGGUCACAUUGCUG---AUGGAAG-GU-UAAAGG-GGC-G-GUCGCCGAAAUUUACGCAUUUU-GACC-G----GAUGCGUG-GGUUGGGUCCU-GGUU--GAACAAGUCAGGGACUGUCGCAGUCA-AAUAAU---------------------------------------------------------CGGCUGCGG-GGCGCUUUCAACGAU
AM263198.1/783761-783564 UGGUGAGGUAGAGGUUGCAAGA-UUC-ACAA--GUA--AUUUUUUAGAAGCGAA---ACAAAGACGCUGAUG---ACAAAGA-AU-GAACAG-GAU-G-AUUGCCGAAGUGACUAUUUUCU-CUUU-GUUUAGAAAUAGU-UGUUGGGACAG-UUUCC-UAAA-GGAGCUGGACUGCUAUAAGAA-UUUGUCGAAAU----------------------------------------------------UUCUUAUAGGUGUGCUAUCUGACAA
AP006716.1/1558409-1558233 AGAAAUUGAUGAGGC-GCAUCA-AUC-AUCA--GUA--UAUAUUAGAU---------AAACUGUCUGCAACA---GCUAAUA-UA-GAAAGG-GUG-C-GAUGCCGAAAUGAAUCAUAAU--CGCA-G----CUAUGAUU-UGUUGGACUUUGUGGU--UAAGAGCUGAAAGUUUGUCAUUAUU--AUU------------------------------------------------------------UAAUAAUGG-AGUGCAUCACUUGUA
ABDQ01000005.1/49327-49153 AACUAAGAUAGAGGU-GCGAGA-UUU-AAGA--GUA--GUAUUAUG-----------GAGUUAAGUGCUAAG---AAGUAAU-AA-GAAAGG-AAA-U-UUCGCCGAAGCUUAUAGAUAAUACUUU-A----AUGCUAUU-UGCUGGGAUUA-CAUA--AAAUAUAUGUAAGACUGUCACAAAU--AAA-------------------------------------------------------------GUUUGUGG-AGAGCUAUUAUUUUA
AE016827.1/1325416-1325239 UACACAUGUAGAGGU-GCGAAU-AUU-AUAA--GUA--UUUUUCCA-----------GAGUGGAUAACAAUG---AAGGAAA-UU-GAAAGG-AAU-A-UUUGCCGAAAUCAGUUAAGC---GUCA------UCUUAACU-GGUUGGUAACG-UCACC-GAAA-GGAACGUUACUGCCAUAGUC--AUUUUUGAU------------------------------------------------------UAACUAUGG-AGCGCUACUCGUGGG
CP000569.1/516240-516411 ACAAAUCGUAGAGGU-GCAAAU-CCG-AUAA--GUA--CUUUUUCU-----------GAGUGGAGAACGAGG---AGGAAAA-AG-GAAAGG-CGU-A-UUUGCCGAAAUCAGUUAAGC---GUCA------UCUUAAUU-GGUUGGGGUCG-UUACC-GAAA-GGGACGACACUGUCGUAAUC--CUU------------------------------------------------------------GUAUUACGG-AGUGCUACUGCUAGG
BA000031.2/1190723-1190903 UGUUGCCGUAGAGGC-GCAGUC-UCG-AAGA--GUA--GCUAUUAUU----------GGGGUGAUGCCAAUG---AAUAAUA-GU-GGAAGG-CGAAG-AUUGCCGAAGUAAGUCGUAU---AUCA------AAACGCCU-UGCUGGGGUUG-UAUCU-GAAA-GGAACAACACUGCCAUAGUA--UAUUUACAUU-----------------------------------------------------AAACUAUGG-AGCGCUACUGUAGGG
CP000425.1/2337129-2337310 CACAUCGAUAGAGGUCGCAACU-GAU-AUGA--AUC----UACGCCGAGU-------UGGAGCACAACAAAG---ACGCGUA-UUAGAGGGGGAGA-A-GUUGCCGAAAGAAUUUUGAC---GCUC-A----G-CAAAGU-UCUUGGGCUAG-UGAG--GAAAACUCACUAGACUGUCGCAAAUGGUUAAGA---------------------------------------------------------ACCAUGCGG-AGGGCUAUUCGUUCA
AASA01000017.1/10332-10501 ACAAAUUGUAGAGGU-GCGAAU-UCA-AUAA--GUA--UUUCUUCU-----------GAGUGGAAAACGAUG---AAGGGGA-AG-GAAAGG-UGA-A-UUUGCCGAAAUCAAUUAAGC---GUCA------UCUUAAUU-GGUUGGGGUCG-UUGUC-GAAA-GAAACGACACUGUCGUAGUA--AAU--------------------------------------------------------------GCUACGG-AGUGCUACUGUUAGG
AP008934.1/1400599-1400421 AGGUUUUGAUGAGGC-GCAUCA-AUC-AUUA--GUA--AAGAUUAGAAGA-------AUCUGACUGCUAGCA---GCUAAUU-UU-GAAAGG-GUG-A-GAUGCCGAAACGGUUAUAAU---AGCA-G----CUUAUAACAUGUUGGACUUUAUGGU--UAAGAGCUAAGAGUCUGUCAUUAUU--UUA------------------------------------------------------------AGAUAAUGG-AGUGCAUCACUUGUA
AAOX01000015.1/22460-22639 AGUGAAGGUAGAGGU-GCAAAC-UUC-AUCA--GUA--AAAGCUUGGAGAA------AGAUGAGUUUCCGUG---AAAAGCU-UU-GAAAGG-GAAUG-UUUGCCGAAGAAAAGGAAGUCU-CAUU------UCUUUCUU-UUCUGGUCCUG-UAUU--GAAUAAAUACUGGAUUGUCAAGACA--GCG------------------------------------------------------------CCGUCUUGG-AGAGCUAUCUCACUG
AL009126.3/3421348-3421169 CAGUGAGGUAGAGGUUGCGCGG-AUG-AUGA--GUC--ACACAUGCUA---------GGCUGACAGGGGCUGUUAAACAUGU-GU-AAAAGG-CAU-C-AGCGCCGAAGUGUGGAGAAAGCCGAUC------CUUCUCUA-UGCUGGGACUG-UAUCU-GAAUAAGUGCAGGACUGCCGCGUGC--UUU--------------------------------------------------------------UUCGCGG-AGGGCUAUCCGGAGA
AE008691.1/719320-719498 CGCAUAAAUAGAGGA-GCUGCC-AAGCAU----GUA--UUUGGCGAGGUGUUAAGGAGAAGAACCUCCAAUA---CUCGCUG-AA-GAA-GG-UUU-G-GCUGCCGAAAGGGUGAGCUUG--UUCU-U----GAGCUCAU-CCUUGGUGGUA-AAC-A-CAAA--GUUUACCACUGUCAUGGGA--CCU--------------------------------------------------------------CCCAUGA-AGCGCUAUUUAUGCA
AE004439.1/1026545-1026371 UACUUGUGUAGAGGA-GCGAUC-ACU-AUAA--GUA--UUUUUUCU-----------GAGUGGAUAACGAAG---AGGAAAA-AG-GAAAGG-AGU-G-ACCGCCGAAAUCAAUUGAAA---GUCA------UUUUGAUU-GGUUGGUGGCG-UAUUC-GAAA-GGAACGUCAUUGUCAUAGUC--UUUUUU---------------------------------------------------------AAACUAUGG-AGCGCUACUGGUUGG
CP000436.1/843556-843727 UACAUAUGUAGAGGU-GCGGCU-GUU-AUAA--GUA--AUUUUUU------------GAGUGGAUAACGAUG---A-AAAAA-AU-GAAAGG-AAU-A-GUUGCCGAAAUCAAUUAAAA---GUCA------UUUUAAUU-GGUUGGGGGCG-UAUUC-GAAA-GAAACGUCACUGUCAUAGUA--UUU-AU---------------------------------------------------------CCACUAUGG-AGCGCUACUGGUUAG
CP001186.1/1355449-1355630 CUCAAAGGUAGAGGCCGCGAUA-GGA-AAGA--GUA--AGCUAUGGGAGAU------UUAAUGGAAUCUGUG---AUCAUAGGUU-GAAAGG-GAC-U-AUUGCCGAAAUAUAAGAAUAAC-CAUC-U----UAUUCAUA-UAUUGGGACUA-CAUU--GAAUAAAUGUAGUACUGUCAUAAGA--UUU------------------------------------------------------------AUUUUAUGG-AGAGCUAUUUGGAGA
AE005176.1/2276232-2276414 CACAUCGAUAGAGGUCGCAACU-GAU-AUGAAUCUACGCCGAGUUGG----------AGCACAACAAAGACG---CGUAUUU-AG-AGGAGG-AGA-G-GUUGCCGAAAGAAUUUUGUU---GCUC------AGCAAGGU-UCUUGGGCUAG-UGAG--AAAAACUCACUAGACUGUCGCAAAUG-GUUAAUA--------------------------------------------------------ACCAUGCGG-AGGGCUAUUCGUUCA
CP000923.1/1076332-1076143 AGGUGAGGUAGAGGC-GCGGGUUAUC-AAGA--GUA--GUAUACCAGAGGU------AUUUAAGGGCCGAUG---AAGGUAU-AU-GAAAGG-GAU-G-CUCGCCGAAGC-GCGUAAAUUCCUUAA-A----GUUUACGC-AGCUGGGCCUA-UGCC--GAAUAGGUAUAGGACUGUCACUGGA--GGUUUCCCGAGC---------------------------------------------------CUUCAGUGA-AGAGCUAUCUCGCUA
AE008691.1/1930427-1930616 AGGUGAGGUAGAGGC-GCGGGU-CAUCAAGA--GUA--ACAUGCCAGAGG-------UGUUAAGGGCCGAUG---AAGGUGU-GU-GAAAGG-GGU-G-CCCGCCGAAGC-GCGUAAACUUCCUUAAG----GUUUACGC-AGCUGGGCCUA-UGCC--GAACAGGUAUAGGACUGUCACUGAAG-GCUCCCCAGGC----------------------------------------------------CUUCAGUGG-AGAGCUAUCUCGCUA
CP000826.1/4955718-4955911 CAAGCCAGAAGAGGU-GCGUCG-CCC-AG----GUA--GAGUGUCAGAGG-------AGCCGUUGUCCAAUG---ACGGCGC-UU-GAGGGG-GAG-C-GACGCCGAGGUAAGGUGAUGUG-CGGC------AUUCAUCG-UAUCGACUACA-GGGGCUGAAU-CCCCUG-AGUUGUCACCAGGG-AUUGUCCGUAGGGGCAAUCA-------------------------------------------GCAAGGUGG-GGCGCUUCUGGGUGU
AL935263.2/928916-929097 AUCGAAAGAAGAGGAUGCGGUU-AAC-AAUA--GUA--GCCGGCUGGAAGU------GGGUCACCACUUAUG---AAGGUCA-GU-GAACGG-GGC-A-ACCGCCGAAAUCGAUGGAUCAGUGACC------GAUUCAUC-CGUUGGGCCUU-GGUU--GAAUAAAUCAUGGACUGUCGCAGCUA-GAA------------------------------------------------------------UAGUUGCGG-GGCGCUAUCGACGAU
AALE02000022.1/49827-50019 UAAGCCAGAAGAGGU-GCGUCG-CCC-AG----GUA--AAGUGUCAGAGG-------AGCCGUGAUCCGCUG---AAGACAU-UC-GAGGGG-GAG-C-GACGCCGAGACACGGUGAUUUC-GGCC-------AUCAGCG-UGUCGACUGCA-GGGGCUGAAU-CCCCUG-GGUUGUCACCAGU--GCCGUUCCUUUAGGGCGGUCA------------------------------------------ACAAGGUGG-AGCGCUUCUGGGUGU
FQ670178.1/927246-927070 UACAAAAGUAGAGGC-GCAAUU-AUU-AUAA--GUA--UUUUUUCA-----------GAGUGGAUAACGAAG---AAGAAAA-AA-GAAAGG-AAU-A-GUUGCCGAAAUCAAAUAAAA---GUCG------UUUUGUUU-GGUUGGUGGCG-UGCUC-GAAA-GGGGCGACACUGUCAUAGUU--UUUCUGAU-------------------------------------------------------UAACUAUGG-AGUGCUACGGUUGUU
#=GC SS_cons :::::((((((,,,,.<<<<<<.<<<.----..---..<<<<<<<<____......._______________..._>>>>>>.>>.------.>>>.>.>>>>>,,,<<<<<<<<<<<<__.____._....>>>>>>>>.>>>><<<<<<<.<<<<_.____.>>>>>>>>>>>,<<<<<<<_.._______........................................................__>>>>>>>.,,,,)))))):::::
#=GC RF aaugacgguAGAGGU.GCgucc.cuc.AugA..GUA..acuuuucuGAgg.......ggaugaaaucCgaUG...Aagaaaa.gu.GAAAGG.gag.g.gacGCCGAagugaguggaccuc.guua.a....gguccacu.cgcuGGccccg.uggcC.GAAu.gccacggggCUGuCAccgua..uuugUuc........................................................acacggUGg.AGcGCUaccggguga
//
This diff is collapsed.
Binary files /dev/null and b/models/RF02540.stockholm.txt differ
This diff is collapsed.
import pickle
modules = pickle.load(open("all3dmotif_graphs.cPickle",'rb'))
sig_g=[]
good_mods = []
n = 0
for i in range(len(modules)):
length = len(modules[i][0])
if length>20 :
good_mods.append(i)
gr = modules[i][0]
pdbs = [(modules[i][1][mod_id][0],list(modules[i][0][mod_id].nodes)) for mod_id in range(len(modules[i][1]))]
sig_g.append({"l_graphs":gr,"names":pdbs})
if length>20:
n=n+1
print(n)
pickle.dump(sig_g,open("best3dmotif_graphs.cPickle",'wb'))
import pickle
from matplotlib import pyplot as plt
import networkx as nx
modules = pickle.load(open("rna3dmotif_one_of_each_graph.cPickle",'rb'))
interesting = [2, 8, 9, 14, 20, 28, 36, 59, 113, 127, 133, 150, 162, 194, 195]
for i in interesting:
g0 = modules[i][0]
pos = nx.circular_layout(g0)
nx.draw_networkx_nodes(g0,pos,nodelist=g0.nodes,node_color='red',node_size=500)
nx.draw_networkx_edges(g0,pos,edgelist=g0.edges(),edge_color='green',width=2)
labels={}
elabels = {}
for i in g0.nodes():
labels[i] = i
nx.draw_networkx_labels(g0,pos,labels)
for i in g0.edges():
elabels[i]=g0.get_edge_data(*i)['label']
nx.draw_networkx_edge_labels(g0,pos,elabels)
plt.show()
"""
def get_seq(g):
seq = ""
for i in list(g.nodes):
seq = seq + str(i)
return seq
def module_seqs(ind):
seqs = []
gs = modules[ind]
for graph in gs:
seq = get_seq(graph)
if seq not in seqs:
seqs.append(seq)
return(len(seqs))
def get_seq_variation():
n_seqs = []
for i in range(len(modules)):
n = module_seqs(i)
n_seqs.append(n)
return n_seqs
ns = get_seq_variation()
goods = []
for i in range(len(ns)):
g = modules[i][0]
#print(list(g.edges(data=True)))
if not any( (x[2]["label"]!="b53" and x[2]["label"]!="cWW") for x in list(g.edges(data=True))):
continue
if ns[i]>8:
goods.append(i)
print(goods)
print(len(goods))
"""
This diff is collapsed.
import networkx as nx
from matplotlib import pyplot as plt
import pickle
modules = pickle.load(open("rna3dmotif_one_of_each_graph.cPickle",'rb'))
aln_list = pickle.load(open("rna3dmotif_aligned_modulegraphs.cPickle",'rb'))
PDBs = pickle.load(open("rna3dmotif_PDB_names.cPickle",'rb'))
PDBs = pickle.load(open("rna3dmotif_PDB_positions.cPickle",'rb'))
def get_seq(g,aln):
addresses = list(g.nodes)
seq = ""
for j in range(min(addresses),max(addresses)+1):
#print(j)
if j in addresses:
#print(list(g.nodes))
index = list(g.nodes).index(j)
#print(list(g.nodes(data=True))[index])
nuc = list(g.nodes(data=True))[index][1]['nuc']
seq = seq + nuc
else:
seq = seq + '-'
return seq
for i in range(len(modules[0])):
g = modules[0][i]
aln = aln_list[i]
s = get_seq(g,aln)
print(s)
GOOD COLUMNS [[868], [869], [870], [871], [872], [873], [877]]
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
STRUCTURE SEQUENCE: ['G', 'A', 'G', 'U', 'A', 'U', 'C']
RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'U', 'A', 'U', 'A']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'U', 'U', 'C']
PURE RFAM SEQUENCE : ['U', '-', '-', 'A', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'C', 'A', 'G', 'A']
PURE RFAM SEQUENCE : ['-', '-', 'G', 'A', 'G', 'A', '-']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'G', 'A', 'A', 'A', 'U']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'C', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'A', 'A', 'C', 'C']
PURE RFAM SEQUENCE : ['-', '-', 'G', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['C', '-', 'U', 'U', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'C', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['-', 'C', 'A', 'C', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['A', '-', 'U', 'U', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['U', '-', 'C', 'G', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'U', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'C', 'A', 'C', 'A']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'U', 'A', 'U', 'U']
PURE RFAM SEQUENCE : ['G', 'G', 'A', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['-', 'C', 'G', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['C', '-', 'G', 'A', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'U', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['U', '-', '-', 'G', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['A', '-', 'G', 'U', 'A', 'G', 'G']
PURE RFAM SEQUENCE : ['U', '-', 'A', 'G', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'C', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'C', 'U', 'A', 'C', 'C']
PURE RFAM SEQUENCE : ['U', '-', 'U', 'U', 'A', 'A', 'U']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', 'U', 'U', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', '-', 'G', 'C', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['-', '-', '-', '-', '-', '-', '-']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'U', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['U', '-', '-', 'U', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'A', 'C', 'G']
PURE RFAM SEQUENCE : ['-', 'G', 'U', 'U', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['U', '-', 'U', 'A', 'U', 'G', 'G']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'U', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['C', 'U', 'G', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'A', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['U', 'U', 'U', 'A', 'C', 'A', 'A']
PURE RFAM SEQUENCE : ['U', 'G', 'U', 'U', 'A', 'U', '-']
PURE RFAM SEQUENCE : ['A', 'G', 'U', 'C', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['C', 'U', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'A', 'G', 'U']
PURE RFAM SEQUENCE : ['-', '-', 'G', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'G', 'A', 'A', 'C', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'C', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', 'U', 'U', 'A', '-', '-', 'A']
PURE RFAM SEQUENCE : ['-', 'U', 'C', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['C', '-', '-', 'U', 'A', 'U', 'A']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['C', 'C', 'G', 'A', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['-', '-', 'G', 'G', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['G', 'U', 'G', 'U', 'G', 'U', 'U']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', 'U', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', '-', 'U', 'U', 'A', 'U', 'U']
PURE RFAM SEQUENCE : ['-', 'C', 'A', 'A', 'A', 'A', '-']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'A', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'A', 'A', 'C', 'G']
PURE RFAM SEQUENCE : ['A', 'U', 'U', 'U', '-', '-', 'G']
PURE RFAM SEQUENCE : ['G', 'C', 'U', 'U', 'A', 'A', 'U']
PURE RFAM SEQUENCE : ['G', 'A', 'G', 'C', 'A', 'U', 'G']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['U', '-', '-', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'C', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['A', 'U', 'U', 'A', 'A', 'A', 'U']
PURE RFAM SEQUENCE : ['C', 'U', 'A', 'G', 'A', 'A', 'G']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'U', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'U', 'A', 'U']
PURE RFAM SEQUENCE : ['G', 'A', 'U', 'C', 'A', 'U', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'C', 'A', 'C', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'U', 'U', 'A', 'C', 'C']
PURE RFAM SEQUENCE : ['G', 'U', 'A', 'U', 'C', 'U', 'U']
==============================================================
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
STRUCTURE SEQUENCE: ['C', 'G', 'G', 'C', 'G', 'A', 'G']
RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'A', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'A', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['U', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'U', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
PURE RFAM SEQUENCE : ['G', 'G', 'C', 'G', 'A', 'G', 'C']
This diff is collapsed.
import os
import networkx as nx
import matplotlib
from matplotlib import pyplot as plt
import networkx.algorithms.isomorphism as iso
import operator
import pickle
def compare_graphs(g1,g2):
em = iso.generic_edge_match('label',["cWW","tWW","cWS","tWS","cWH","tWH","cHH","tHH","tHW","cHW","cHS","tHS","cSW","tSW","cSH","tSH","cSS","tSS","c++","t++","c--","t--","b53"],operator.eq)
isof = nx.is_isomorphic(g1,g2,edge_match=em)
return isof
def make_align_graph(g1):
corr_edges = []
g0 = nx.DiGraph()
g0.add_nodes_from(g1.nodes())
g0.add_edges_from(g1.edges())
pos = nx.spring_layout(g0)
nx.draw_networkx_nodes(g0,pos,nodelist=g1.nodes(),node_color='red',node_size=500)
nx.draw_networkx_edges(g0,pos,edgelist=g1.edges(),edge_color='red',width=2)
labels={}
elabels = {}
for i in g0.nodes():
labels[i] = i
nx.draw_networkx_labels(g0,pos,labels)
for i in g1.edges():
elabels[i]=g1.get_edge_data(*i)['label']
nx.draw_networkx_edge_labels(g0,pos,elabels)
plt.show()
os.chdir('DESC')
#print(os.listdir('.'))
tot = len(os.listdir('.'))
z=0
graphs = []
IDs = []
bob = nx.DiGraph()
for i in os.listdir('.'):
if 'desc' in i:
#print(i)
z = z+1
if z%100==0:
print("doing number "+str(z)+" of "+str(tot))
g = nx.DiGraph()
with open(i,'r') as desc :
lines = desc.readlines()
if len(lines)<2:
continue
nodestuff = lines[1].split(" ")
#print(nodestuff)
for n in nodestuff:
if len(n)>0:
if n[0].isdigit():
node_n = n[:-2]
nt = n[-1]
g.add_node(int(node_n),nuc=nt)
for line in range(2,len(lines)):
bp = lines[line].split("---")
#print (bp)
p1 =bp[0][4:8].replace(" ","")
p2 =bp[2][2:7]
#if '(' in p2.split():
# p2.replace("(","")
#rint(p1,p2)
#p1 = "".join([s for s in p1_.split() if s.isdigit()])
#p2 = "".join([s for s in p2_.split() if s.isdigit()])
#print(p1,p2)
#print(p1_,p2_)
interaction = bp[1][1:4]
orientation = bp[1][5]
#print(p1,p2,interaction,orientation)
if orientation=='s':
continue
elif interaction=="C/C":
g.add_edge(int(p1),int(p2),long_range=False ,label='b53')
elif interaction=="+/+" or interaction=="-/-":
bond = orientation + "WW"
g.add_edge(int(p1), int(p2), long_range= False, label=bond)
else:
bond = orientation + interaction[0]+interaction[2]
g.add_edge(int(p1), int(p2), long_range=False, label= bond)
#nx.draw(g)
#print("GRAPH")
#print(g.nodes(data=True))
#print(g.edges(data=True))
#for i in g.edges():
# print(i, g.get_edge_data(*i))
if len(graphs)==0:
graphs.append([g])
IDs.append([i[:-5]])
else:
found = False
kk=0
#print("GOT HERE")
while kk < len(graphs) and found==False:
if compare_graphs(g,graphs[kk][0])==True:
graphs[kk].append(g)
IDs[kk].append([i[:-5]])
found=True
kk = kk+1