Skip to content
Permalink
4fc296996d
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
118 lines (81 sloc) 2.75 KB
import os
from datetime import datetime
import sys
import tarfile
import shutil
genome_fastq_file = sys.argv[1]
genome_name = sys.argv[1][:-6]
t1 = datetime.now()
if not os.path.exists(genome_name):
os.makedirs(genome_name)
Temp='temp'
if not os.path.exists(Temp):
os.makedirs(Temp)
file = open(genome_fastq_file)
ID = open(os.path.join(genome_name, genome_name+'.ID'), "w")
Seq = open(os.path.join(genome_name, genome_name+'.Seq'), "w")
Qual = open(os.path.join(genome_name, genome_name+'.Qual'), "w")
Seq.write('>\n')
line = file.readline()
def tsplit(string, delimiters):
"""Behaves str.split but supports multiple delimiters."""
delimiters = tuple(delimiters)
stack = [string,]
for delimiter in delimiters:
for i, substring in enumerate(stack):
substack = substring.split(delimiter)
stack.pop(i)
for j, _substring in enumerate(substack):
stack.insert(i+j, _substring)
return stack
Length= len(tsplit(line, (' ','.', '/', '-','_',':','=')))
sep=''
for i in line:
if i == ' ' or i == '.' or i == '/' or i == '-' or i == '_' or i == ':' or i == '=':
sep+=i
Sep = open(os.path.join(Temp,"sep.txt"), "w")
Sep.write(sep)
Sep.close()
file.seek(0)
while line:
ID.write(file.readline())
Seq.write(file.readline().strip())
line = file.readline()
Qual.write(file.readline())
file.readline().strip()
Qual.close()
ID.close()
Seq.close()
file.close()
cd='./MFcompress/MFCompressC -3 '+'-o'+' ' + os.path.join(Temp,genome_name+'.Seq') +' '+os.path.join(genome_name,genome_name+'.Seq') +' &' + './lpaq8/lpaq8 9 '+ os.path.join(genome_name,genome_name+'.Qual') +' '+os.path.join(Temp,genome_name+'.Qual') + '| grep Hello'
os.system(cd)
AWK= 'awk -F'
Delimter= "'[_ =:./-]'"
cd2 = " '{print $"
space= ' '
source= os.path.join(genome_name, genome_name+'.ID')
concat= ' > '
for h in range(1,Length+1):
cd3= str(h)+"}'"
dest= os.path.join(genome_name, str(h)+'.ID')#
cd = AWK+ Delimter+cd2+cd3+space+source+concat+dest
os.system(cd)
for x in range(1,Length+1):
cd='./lpaq8/lpaq8 9 '+ os.path.join(genome_name,str(x)+'.ID') +' '+os.path.join(genome_name,str(x)+'.ID.lpaq8') + '| grep Hello'
os.system(cd)
os.remove(os.path.join(genome_name,str(x)+'.ID'))
IDtar = tarfile.open(os.path.join(Temp,genome_name+'.LFastqC'), mode='w')
try:
for x in range(1,Length+1):
IDtar.add(os.path.join(genome_name,str(x)+'.ID.lpaq8'))
os.remove(os.path.join(genome_name,str(x)+'.ID.lpaq8'))
finally:
IDtar.close()
out = tarfile.open(genome_name+'.LFastqC', mode='w')
try:
out.add('temp')
finally:
out.close()
shutil.rmtree('temp')
shutil.rmtree(genome_name)
print "Compression time in seconds = ", (datetime.now() - t1).seconds, "\n"