Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
107 lines (87 sloc) 2.44 KB
#!/usr/bin/python
from collections import defaultdict
from bisect import bisect
value,key,s="","",{}
comp={'A':'T','T':'A','G':'C','C':'G'}
counts=defaultdict(lambda: 0)
def revcomp(s):
t=""
for x in s[::-1]:
t+=comp[x]
return(t)
with open("rosalind_corr.txt","rU") as f:
for x in f:
if x[0]!=">":
value=value+x.strip()
else:
if key!="": s[key]=value
key=x[1:].strip()
value=""
s[key]=value
for x in s.values():
rx=revcomp(x)
if rx>x:
counts[rx]+=1
else:
counts[x]+=1
lgood=sorted([t for t in counts.keys() if counts[t]>1])
lbad=[t for t in counts.keys() if counts[t]<=1]
weird=[]
answers=[]
testcount=0
for z in lbad:
m=bisect(lgood, z)
counter=0
if m<len(lgood):
for i,x in enumerate(lgood[m]):
if x!=z[i]:
if counter==1:
counter=0
break
else:
counter+=1
else:
# print z, "->",lgood[m]
answers.append((z,lgood[m]))
testcount+=1
continue
if m>0:
for i,x in enumerate(lgood[m-1]):
if x!=z[i]:
if counter==1:
counter=0
break
else:
counter+=1
else:
# print z, "->",lgood[m-1]
answers.append((z,lgood[m-1]))
testcount+=1
continue
weird.append(z)
print '========'
testcount=0
for x in weird:
for i in range(len(x)):
for c in ['A','G','T','C']:
xchange=x[0:i]+c+x[i+1:]
if revcomp(xchange)>xchange:
xchange=revcomp(xchange)
xuse=revcomp(x)
else:
xuse=x
m=bisect(lgood,xchange)
if m>0 and xchange==lgood[m-1]:
answers.append((xuse,lgood[m-1]))
testcount+=1
if m<len(lgood) and xchange==lgood[m]:
answers.append((xuse,lgood[m]))
testcount+=1
answers.sort(key=lambda z: z[1])
for (u,v) in answers:
if u in s.values():
print u+"->"+v
# print [i for i,x in enumerate(u) if x!=v[i]]
else:
print revcomp(u)+'->'+revcomp(v)
#print [i for i,x in enumerate(revcomp(u)) if x!=revcomp(v)[i]]
You can’t perform that action at this time.