Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
89 lines (74 sloc) 2.01 KB
from collections import defaultdict
from sys import setrecursionlimit, stdout
setrecursionlimit(1500)
#trie=defaultdict(list)
##labels={}
#Node=0
#Next=1
data=[]
with open("rosalind_mrep.txt","rU") as f:
line=f.readline().strip()
line=line+'$'
#line='TAGAGATAGAATGGGTCCAGAGTTTTGTAATTTCCATGGGTCCAGAGTTTTGTAATTTATTATATAGAGATAGAATGGGTCCAGAGTTTTGTAATTTCCATGGGTCCAGAGTTTTGTAATTTAT$'
line=line[:-1][::-1]+'$'
#line='ATGATC$'
for i in range(len(line)):
data.append(line[i:])
#for x in data:
## Node=0
## for y in x:
# s=[labels[i] for i in trie[Node]]
# if y in s:
# Node=trie[Node][s.index(y)]
# else:
# trie[Node].append(Next)
# labels[Next]=y
# Node,Next=Next,Next+1
def build_trie(data):
trie=defaultdict(list)
labels={}
Node,Next=0,1
for x in data:
Node=0
for y in x:
s=[labels[i] for i in trie[Node]]
#print y,Node,Next
if y in s:
Node=trie[Node][s.index(y)]
else:
trie[Node].append(Next)
labels[Next]=y
Node,Next=Next,Next+1
return trie,labels
trie,labels=build_trie(data)
repeats={}
def repeat_cnt(trie,Node):
repeats[Node]=0
if len(trie[Node])==0:
repeats[Node]=1
return
else:
for y in trie[Node]:
repeat_cnt(trie,y)
repeats[Node]+=repeats[y]
return
f=stdout
for x in trie[0]:
repeat_cnt(trie,x)
def strings(trie,node,s):
#print node,s
if len(trie[node])>1:
if len(s)>=20 and repeats[node]>=2:
f.write(s+labels[node]+' '+str(repeats[node])+'\n')
answer.append(s+labels[node])
for n in trie[node]:
strings(trie,n,s+labels[node])
return
if len(trie[node])==0:
#if(len(s))>=20:
# f.write(s+'$\n')
return
answer=[]
for x in trie[0]:
strings(trie,x,'')
#f.close()
You can’t perform that action at this time.