Commit cfdc4738 authored by Carlos GO's avatar Carlos GO
Browse files

merge fix

parents 76a55a0e ff99ed93
......@@ -32,10 +32,19 @@ When a simulation is to be repeated with the same parameters, the -r flag tells
```
python mateRNAl.py -l 100 -g 0.3 -m 0.1 -t 2000 -n 5 -p 5
```
You can start a simulation from a seed population of RNA sequences using the `--initial` option followed by the path to a sequence file. The file should have an RNA sequence on each line. An error will be thrown if sequences are of different lengths. If using `target` mode an error will be thrown if the size of the sequence does not match the size of the target structure.
```
python mateRNAl.py --initial pop.txt -t "....((((......((((...((((((.....))))))))))....))))"
```
Where `pop.txt` is a plaintext file containing one sequence per line.
### Output
Each run produces a `.csv` file that can be easily parsed with tools such as [pandas](http://pandas.pydata.org/). Each column of the csv is labeled as follows:
```
generation, sequence, structure, energy, probability, gc, mutations,
generation, sequence, structure, energy, probability, gc, mutations, fitness, id, parent
```
......@@ -74,6 +74,11 @@ class RNA:
if fit == "target":
self.fitness = math.exp((-1 * beta * bp_dist(self.structure,\
target)) / len(self.structure))
if fit == "gaussian":
sigma = 20
mean = 40
z = abs(-1 * self.energy - mean) / sigma
self.fitness = 1 / (1 + np.exp(z))
pass
def hamming(s1, s2):
......@@ -139,16 +144,20 @@ def fold(sequences):
def gc_content(s):
return len([n for n in s if n == "G" or n == "C"]) / float(len(s))
def populate(size, length, gc, fit="energy", target=None):
pop = []
while len(pop) < size:
seq = None
while True:
seq = "".join([np.random.choice(BASES, p=[(1-gc)/2, (1- gc)/2,\
gc/2, gc/2]) for _ in range(length)])
if gc - 0.1 <= gc_content(seq) <= gc + 0.1:
break
pop.append(seq)
def populate(size, length, gc, fit="energy", target=None, initial=None):
if not initial:
pop = []
while len(pop) < size:
seq = None
while True:
seq = "".join([np.random.choice(BASES, p=[(1-gc)/2, (1- gc)/2,\
gc/2, gc/2]) for _ in range(length)])
if gc - 0.1 <= gc_content(seq) <= gc + 0.1:
break
pop.append(seq)
else:
pop = initial
folded = fold(pop)
rnas = [RNA(f.sequence, f.structure, f.energy, f.probability, 0, i, i,\
f.sequence) for i, f in enumerate(folded)]
......@@ -162,12 +171,8 @@ def mutate(rna, mutation_rate):
#if adaptive mutation rate, mutation rate is a function of stability.
if mutation_rate == -1:
<<<<<<< HEAD
sig = lambda e: 1 / (3 + np.exp(-e/15))
=======
# sig = lambda e: 1 / (1 + np.exp(-e/10))
sig = lambda x: 1 / (6 + 200 * np.exp(x/10))
>>>>>>> 756a44aaf893605bb016d1b69222e9e0116c05c9
mutation_rate = sig(rna.energy)
pass
......@@ -291,15 +296,42 @@ def normalize(pop, density=False, K=10):
#evolve
def evolve(args):
generations, size, length, fit, gc, mutation_rate, dest,\
target, verbose, density, K, fixpop = args
current_pop = populate(size, length, gc, fit=fit, target=target)
target, verbose, density, K, fixpop, initial = args
if not initial:
current_pop = populate(size, length, gc, fit=fit, target=target)
else:
#if given initial population, parse sequences file
length = 0
size = 0
pop = []
with open(initial, "r") as p:
for i,rna in enumerate(p):
seq = "".join([b.upper() for b in rna.split()[0]])
if i == 0:
length = len(seq)
if target:
assert length==len(target),f'Sequence length {length}\
does not match target length {len(target)}'
else:
assert len(seq) == length, 'Sequences must be same length'
assert len(set(seq).difference({'A', 'U', 'C', 'G'}))==0\
, f'Invalid nucleotide in {seq}'
size += 1
pop.append(seq)
print(f"Loaded {size} sequences of length {length} from file {initial}.")
current_pop = populate(size, length, gc, fit=fit, target=target,\
initial=pop)
pops = [current_pop]
for g in range(generations):
print(f"generation {g}")
#density normalize current_pop
next_pop = select(current_pop, mutation_rate, gc, fit=fit,\
target=target, density=density, K=K, fixpop=fixpop)
current_pop = next_pop
pops.append(current_pop)
print("Writing output file.")
write_pops(pops, dest,verbose=verbose)
pass
......@@ -312,22 +344,22 @@ def dest_format(dest, i):
@begin.start
@begin.convert(generations=int, size=int, length=int, fit=str, gc=float,\
mutation_rate=float, runs=int, procs=int, target=str, dest=str,\
density=bool, K=int, fixpop=bool)
density=bool, K=int, fixpop=bool, initial=str)
def start(generations=20, size=10, length=50, fit='energy', gc=0.5,\
mutation_rate=0.1, runs=1, procs=1, beta=1, target=None, dest="maternal.csv",\
verbose=False, density=False, K=10, fixpop=True):
verbose=False, density=False, K=10, fixpop=True, initial=None):
print(f"runs: {runs}")
if fit =="target":
length=len(target)
todo = ((generations, size, length, fit, gc, mutation_rate,\
dest_format(dest, i), target, verbose, density, K, fixpop) for i in range(runs))
dest_format(dest, i), target, verbose, density, K, fixpop, initial) for i in range(runs))
if runs > 1:
with multiprocessing.Pool(procs) as pool:
pool.map(evolve, todo)
else:
evolve(next(todo))
if __name__ == "__main__":
start(mutation_rate=-1,verbose=True, generations=1000, size=1000, runs=20,\
:x
:x
# if __name__ == "__main__":
# start(mutation_rate=-1,verbose=True, generations=1000, size=1000, runs=20,\
# dest="maternal_50_b10.csv", procs=10)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment