Skip to content
Permalink
Newer
Older
100644 106 lines (86 sloc) 2.52 KB
Oct 18, 2018
1
#!/usr/bin/env python
2
3
import sys
4
import os
5
import textwrap
6
7
InFile1 = sys.argv[1]
8
InFile2 = sys.argv[2]
9
InFile3 = sys.argv[3]
10
11
OInFile1 = open(InFile1, 'rU')
12
OInFile2 = open(InFile2, 'rU')
13
OutFile1 = open(InFile3, 'w')
14
15
Concatdict1 = {}
16
TaxaList = []
17
for Line in OInFile1:
18
Line = Line.strip("\n")
19
if Line[0] == ">":
20
TaxaList.append(Line)
21
Concatdict1[Line] = []
22
else:
23
Concatdict1[TaxaList[len(TaxaList)-1]].append(Line)
24
25
Concatdict2 = {}
26
TaxaList2 = []
27
for Line in OInFile2:
28
Line = Line.strip("\n")
29
if Line[0] == ">":
30
TaxaList2.append(Line)
31
Concatdict2[Line] = []
32
else:
33
Concatdict2[TaxaList2[len(TaxaList2)-1]].append(Line)
34
35
MissingSeqs2 = frozenset(Concatdict1.keys()) - frozenset(Concatdict2.keys()) #Keys in 1 but not 2
36
MissingSeqs1 = frozenset(Concatdict2.keys()) - frozenset(Concatdict1.keys()) #Keys in 2 but not 1
37
38
print MissingSeqs1
39
print MissingSeqs2
40
41
SeqLengthFile1 = []
42
43
X = 1
44
for Element in Concatdict1:
45
ValueStr = "".join(Concatdict1[Element])
46
SeqLengthFile1.append(len(ValueStr))
47
X = X + 1
48
if X > 1:
49
break
50
#print SeqLengthFile1
51
52
EmptySeq1 = "-" * SeqLengthFile1[0]
53
54
55
SeqLengthFile2 = []
56
X = 1
57
for Element in Concatdict2:
58
ValueStr2 = "".join(Concatdict2[Element])
59
SeqLengthFile2.append(len(ValueStr2))
60
X = X + 1
61
if X > 1:
62
break
63
#print SeqLengthFile2
64
65
EmptySeq2 = "-" * SeqLengthFile2[0]
66
67
68
for Elements in MissingSeqs1:
69
Concatdict1[Elements] = list(EmptySeq1)
70
for Elements in MissingSeqs2:
71
Concatdict2[Elements] = list(EmptySeq2)
72
73
for Element in Concatdict2:
74
SeqStr = "".join(Concatdict2[Element])
75
Concatdict1[Element].append(SeqStr)
76
77
for Key in Concatdict1:
78
print Key
79
OutFile1.write(Key + "\n")
80
SeqStr = "".join(Concatdict1[Key])
81
OutFile1.write(SeqStr + "\n")
82
#OutFile1.write(textwrap.fill(SeqStr, width=60) + "\n")
83
print SeqStr
84
#print len(SeqStr)
85
86
OInFile1.close()
87
OInFile2.close()
88
OutFile1.close()
89
90
OutFile2 = "MissingSeqs.list_" + InFile3
91
OpenOutFile2 = open(OutFile2, 'w')
92
OpenOutFile2.write("Missing sequences in " + InFile1 + " are: " + "\n")
93
for Element in MissingSeqs1:
94
OpenOutFile2.write(Element + "\n")
95
OpenOutFile2.write("Missing sequences in " + InFile2 + " are: " + "\n")
96
for Element in MissingSeqs2:
97
OpenOutFile2.write(Element + "\n")
98
OpenOutFile2.close()
99
100
OutFile3 = "Partition.list_" + InFile3
101
openOutFile3 = open(OutFile3, 'w')
102
openOutFile3.write(InFile1 + " = 1 - " + str(SeqLengthFile1[0]) + "\n")
103
first_value = SeqLengthFile1[0] + 1
104
second_value = SeqLengthFile1[0] + SeqLengthFile2[0]
105
openOutFile3.write(InFile2 + " = " + str(first_value) + " - " + str(second_value) + "\n")
106
openOutFile3.close()
You can’t perform that action at this time.