Commit 03c95e75 authored by Carlos GO's avatar Carlos GO
Browse files

contiguity computation

parent ede3913d
import numpy as np
import pandas as pd
def contiguity(stem):
"""
return: contiguity score for stem
"""
stem_length = len(stem)
bps = stem.count('(')
stacks = 0
#count number of contiguous stacks
i = 0
j = len(stem) - 1
stack_size = 0
in_stack = False
while i < j:
l = stem[i]
r = stem[j]
if l == "(" and r == ")":
stack_size += 1
i += 1
j -= 1
if stack_size > 1:
in_stack = True
else:
if in_stack:
stacks += 1
in_stack = False
if l == "(" and r == ".":
j -= 1
if l == "." and r == ")":
i += 1
if l == "." and r == ".":
i += 1
j -= 1
return np.log((stem_length - bps) / stacks)
def stem_find(ss):
"""
return: list containint start and end indices of all stems in RNA
"""
#remove dangles
ss = ss.strip('.')
print(ss)
stack = []
stem_start = None
stem_end= None
#add bases to stack if open
#remove from stack if closing
#if stack is empty we have a stem
stem_indices = []
for i, b in enumerate(ss):
if b == "(":
if len(stack) == 0:
stem_start = i
stack.append(b)
elif b == ")":
stack.pop()
if len(stack) == 0:
stem_end = i
stem_indices.append((stem_start, stem_end))
else:
continue
if len(stack) != 0:
print("UNBALANCED!")
return stem_indices
def mean_contig(ss, stems):
contigs = []
for s in stems:
start, end = s
contigs.append(contiguity(ss[start: end+1]) * float(start-end))
return np.mean(contigs)
def ml_contiguity():
df = pd.read_csv("../Data/rnamuts_multiloops.csv")
sss = df['structure']
for ss in sss:
print(stem_find(ss))
break
if __name__ == "__main__":
ml_contiguity()
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment