Commit b67041d0 authored by Carlos GO's avatar Carlos GO
Browse files

initial population

parent 756a44aa
......@@ -139,16 +139,20 @@ def fold(sequences):
def gc_content(s):
return len([n for n in s if n == "G" or n == "C"]) / float(len(s))
def populate(size, length, gc, fit="energy", target=None):
pop = []
while len(pop) < size:
seq = None
while True:
seq = "".join([np.random.choice(BASES, p=[(1-gc)/2, (1- gc)/2,\
gc/2, gc/2]) for _ in range(length)])
if gc - 0.1 <= gc_content(seq) <= gc + 0.1:
break
pop.append(seq)
def populate(size, length, gc, fit="energy", target=None, initial=None):
if not initial:
pop = []
while len(pop) < size:
seq = None
while True:
seq = "".join([np.random.choice(BASES, p=[(1-gc)/2, (1- gc)/2,\
gc/2, gc/2]) for _ in range(length)])
if gc - 0.1 <= gc_content(seq) <= gc + 0.1:
break
pop.append(seq)
else:
pop = initial
folded = fold(pop)
rnas = [RNA(f.sequence, f.structure, f.energy, f.probability, 0, i, i,\
f.sequence) for i, f in enumerate(folded)]
......@@ -287,15 +291,42 @@ def normalize(pop, density=False, K=10):
#evolve
def evolve(args):
generations, size, length, fit, gc, mutation_rate, dest,\
target, verbose, density, K, fixpop = args
current_pop = populate(size, length, gc, fit=fit, target=target)
target, verbose, density, K, fixpop, initial = args
if not initial:
current_pop = populate(size, length, gc, fit=fit, target=target)
else:
#if given initial population, parse sequences file
length = 0
size = 0
pop = []
with open(initial, "r") as p:
for i,rna in enumerate(p):
seq = "".join([b.upper() for b in rna.split()[0]])
if i == 0:
length = len(seq)
if target:
assert length==len(target),f'Sequence length {length}\
does not match target length {len(target)}'
else:
assert len(seq) == length, 'Sequences must be same length'
assert len(set(seq).difference({'A', 'U', 'C', 'G'}))==0\
, f'Invalid nucleotide in {seq}'
size += 1
pop.append(seq)
print(f"Loaded {size} sequences of length {length} from file {initial}.")
current_pop = populate(size, length, gc, fit=fit, target=target,\
initial=pop)
pops = [current_pop]
for g in range(generations):
print(f"generation {g}")
#density normalize current_pop
next_pop = select(current_pop, mutation_rate, gc, fit=fit,\
target=target, density=density, K=K, fixpop=fixpop)
current_pop = next_pop
pops.append(current_pop)
print("Writing output file.")
write_pops(pops, dest,verbose=verbose)
pass
......@@ -308,21 +339,22 @@ def dest_format(dest, i):
@begin.start
@begin.convert(generations=int, size=int, length=int, fit=str, gc=float,\
mutation_rate=float, runs=int, procs=int, target=str, dest=str,\
density=bool, K=int, fixpop=bool)
density=bool, K=int, fixpop=bool, initial=str)
def start(generations=20, size=10, length=50, fit='energy', gc=0.5,\
mutation_rate=0.1, runs=1, procs=1, beta=1, target=None, dest="maternal.csv",\
verbose=False, density=False, K=10, fixpop=True):
verbose=False, density=False, K=10, fixpop=True, initial=None):
print(f"runs: {runs}")
if fit =="target":
length=len(target)
todo = ((generations, size, length, fit, gc, mutation_rate,\
dest_format(dest, i), target, verbose, density, K, fixpop) for i in range(runs))
dest_format(dest, i), target, verbose, density, K, fixpop, initial) for i in range(runs))
if runs > 1:
with multiprocessing.Pool(procs) as pool:
pool.map(evolve, todo)
else:
evolve(next(todo))
if __name__ == "__main__":
start(mutation_rate=-1,verbose=True, generations=1000, size=1000, runs=20,\
dest="maternal_50_b10.csv", procs=10)
# if __name__ == "__main__":
# start(mutation_rate=-1,verbose=True, generations=1000, size=1000, runs=20,\
# dest="maternal_50_b10.csv", procs=10)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment