Skip to content
Permalink
116ffa9ba2
Go to file
 
 
Cannot retrieve contributors at this time
106 lines (86 sloc) 2.52 KB
#!/usr/bin/env python
import sys
import os
import textwrap
InFile1 = sys.argv[1]
InFile2 = sys.argv[2]
InFile3 = sys.argv[3]
OInFile1 = open(InFile1, 'rU')
OInFile2 = open(InFile2, 'rU')
OutFile1 = open(InFile3, 'w')
Concatdict1 = {}
TaxaList = []
for Line in OInFile1:
Line = Line.strip("\n")
if Line[0] == ">":
TaxaList.append(Line)
Concatdict1[Line] = []
else:
Concatdict1[TaxaList[len(TaxaList)-1]].append(Line)
Concatdict2 = {}
TaxaList2 = []
for Line in OInFile2:
Line = Line.strip("\n")
if Line[0] == ">":
TaxaList2.append(Line)
Concatdict2[Line] = []
else:
Concatdict2[TaxaList2[len(TaxaList2)-1]].append(Line)
MissingSeqs2 = frozenset(Concatdict1.keys()) - frozenset(Concatdict2.keys()) #Keys in 1 but not 2
MissingSeqs1 = frozenset(Concatdict2.keys()) - frozenset(Concatdict1.keys()) #Keys in 2 but not 1
print MissingSeqs1
print MissingSeqs2
SeqLengthFile1 = []
X = 1
for Element in Concatdict1:
ValueStr = "".join(Concatdict1[Element])
SeqLengthFile1.append(len(ValueStr))
X = X + 1
if X > 1:
break
#print SeqLengthFile1
EmptySeq1 = "-" * SeqLengthFile1[0]
SeqLengthFile2 = []
X = 1
for Element in Concatdict2:
ValueStr2 = "".join(Concatdict2[Element])
SeqLengthFile2.append(len(ValueStr2))
X = X + 1
if X > 1:
break
#print SeqLengthFile2
EmptySeq2 = "-" * SeqLengthFile2[0]
for Elements in MissingSeqs1:
Concatdict1[Elements] = list(EmptySeq1)
for Elements in MissingSeqs2:
Concatdict2[Elements] = list(EmptySeq2)
for Element in Concatdict2:
SeqStr = "".join(Concatdict2[Element])
Concatdict1[Element].append(SeqStr)
for Key in Concatdict1:
print Key
OutFile1.write(Key + "\n")
SeqStr = "".join(Concatdict1[Key])
OutFile1.write(SeqStr + "\n")
#OutFile1.write(textwrap.fill(SeqStr, width=60) + "\n")
print SeqStr
#print len(SeqStr)
OInFile1.close()
OInFile2.close()
OutFile1.close()
OutFile2 = "MissingSeqs.list_" + InFile3
OpenOutFile2 = open(OutFile2, 'w')
OpenOutFile2.write("Missing sequences in " + InFile1 + " are: " + "\n")
for Element in MissingSeqs1:
OpenOutFile2.write(Element + "\n")
OpenOutFile2.write("Missing sequences in " + InFile2 + " are: " + "\n")
for Element in MissingSeqs2:
OpenOutFile2.write(Element + "\n")
OpenOutFile2.close()
OutFile3 = "Partition.list_" + InFile3
openOutFile3 = open(OutFile3, 'w')
openOutFile3.write(InFile1 + " = 1 - " + str(SeqLengthFile1[0]) + "\n")
first_value = SeqLengthFile1[0] + 1
second_value = SeqLengthFile1[0] + SeqLengthFile2[0]
openOutFile3.write(InFile2 + " = " + str(first_value) + " - " + str(second_value) + "\n")
openOutFile3.close()
You can’t perform that action at this time.