diff --git a/compress.py b/compress.py deleted file mode 100644 index 35da95c..0000000 --- a/compress.py +++ /dev/null @@ -1,118 +0,0 @@ -import os -from datetime import datetime -import sys -import tarfile -import shutil - - - - -genome_fastq_file = sys.argv[1] -genome_name = sys.argv[1][:-6] -t1 = datetime.now() - -if not os.path.exists(genome_name): - os.makedirs(genome_name) - -Temp='temp' -if not os.path.exists(Temp): - os.makedirs(Temp) - - - -file = open(genome_fastq_file) -ID = open(os.path.join(genome_name, genome_name+'.ID'), "w") -Seq = open(os.path.join(genome_name, genome_name+'.Seq'), "w") -Qual = open(os.path.join(genome_name, genome_name+'.Qual'), "w") -Seq.write('>\n') -line = file.readline() - -def tsplit(string, delimiters): - """Behaves str.split but supports multiple delimiters.""" - - delimiters = tuple(delimiters) - stack = [string,] - - for delimiter in delimiters: - for i, substring in enumerate(stack): - substack = substring.split(delimiter) - stack.pop(i) - for j, _substring in enumerate(substack): - stack.insert(i+j, _substring) - - return stack - -Length= len(tsplit(line, (' ','.', '/', '-','_',':','='))) - -sep='' - -for i in line: - if i == ' ' or i == '.' or i == '/' or i == '-' or i == '_' or i == ':' or i == '=': - sep+=i - -Sep = open(os.path.join(Temp,"sep.txt"), "w") -Sep.write(sep) -Sep.close() -file.seek(0) - -while line: - - ID.write(file.readline()) - Seq.write(file.readline().strip()) - line = file.readline() - Qual.write(file.readline()) - -file.readline().strip() -Qual.close() -ID.close() -Seq.close() -file.close() - - - -cd='./MFcompress/MFCompressC -3 '+'-o'+' ' + os.path.join(Temp,genome_name+'.Seq') +' '+os.path.join(genome_name,genome_name+'.Seq') +' &' + './lpaq8/lpaq8 9 '+ os.path.join(genome_name,genome_name+'.Qual') +' '+os.path.join(Temp,genome_name+'.Qual') + '| grep Hello' -os.system(cd) - -AWK= 'awk -F' -Delimter= "'[_ =:./-]'" -cd2 = " '{print $" - -space= ' ' -source= os.path.join(genome_name, genome_name+'.ID') -concat= ' > ' - - -for h in range(1,Length+1): - cd3= str(h)+"}'" - dest= os.path.join(genome_name, str(h)+'.ID')# - cd = AWK+ Delimter+cd2+cd3+space+source+concat+dest - os.system(cd) - - - -for x in range(1,Length+1): - cd='./lpaq8/lpaq8 9 '+ os.path.join(genome_name,str(x)+'.ID') +' '+os.path.join(genome_name,str(x)+'.ID.lpaq8') + '| grep Hello' - os.system(cd) - os.remove(os.path.join(genome_name,str(x)+'.ID')) - - -IDtar = tarfile.open(os.path.join(Temp,genome_name+'.LFastqC'), mode='w') - -try: - - for x in range(1,Length+1): - IDtar.add(os.path.join(genome_name,str(x)+'.ID.lpaq8')) - os.remove(os.path.join(genome_name,str(x)+'.ID.lpaq8')) -finally: - IDtar.close() - - - -out = tarfile.open(genome_name+'.LFastqC', mode='w') -try: - out.add('temp') -finally: - out.close() -shutil.rmtree('temp') -shutil.rmtree(genome_name) -print "Compression time in seconds = ", (datetime.now() - t1).seconds, "\n" \ No newline at end of file diff --git a/decompress.py b/decompress.py deleted file mode 100644 index 5f66cbc..0000000 --- a/decompress.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -from datetime import datetime -import sys -import tarfile -import shutil -import glob - - -genome_fastq_file = sys.argv[1] -genome_name = sys.argv[1] - - - -t1 = datetime.now() - -tar = tarfile.open(genome_name) -tar.extractall() -tar.close() -tar = tarfile.open(os.path.join('temp',genome_name)) -tar.extractall('temp') -tar.close() -os.remove(os.path.join('temp',genome_name)) -genome_name=genome_name[:-8] - - -Sep = open(os.path.join('temp',"sep.txt"), "r") -sep = Sep.readline() -sep=sep+'\n' -Sep.close() -os.remove(os.path.join('temp',"sep.txt")) - -directory= os.path.join('temp',genome_name) - -for x in range(1,len(sep)+1): - cd='./lpaq8/lpaq8 d '+ os.path.join(directory,str(x)+'.ID.lpaq8') +' '+os.path.join(directory,str(x)+'.ID') + '| grep Hello' - os.system(cd) - os.remove(os.path.join(directory,str(x)+'.ID.lpaq8')) - - -cd='./MFcompress/MFCompressD '+' ' + os.path.join('temp',genome_name+'.Seq') + ' & '+ './lpaq8/lpaq8 d '+ os.path.join('temp',genome_name+'.Qual') +' '+os.path.join('temp',genome_name+'.Qual2') + '| grep Hello'#+' '+os.path.join('temp',genome_name+'.Seq2') -os.system(cd) -os.remove(os.path.join('temp',genome_name+'.Seq')) - -os.remove(os.path.join('temp',genome_name+'.Qual')) - - -ID = open(os.path.join('temp',"ID.txt"), "w") - - -file = open(os.path.join(directory,'1.ID')) -line = file.readline() -size=os.path.getsize(os.path.join(directory,'1.ID')) -file.close() - -iterate= size/len(line) - - -f=[] -for x in range(1,len(sep)+1): - f.append(open(os.path.join(directory, str(x)+'.ID'))) - -idc='' - -for y in range(iterate): - for x in range(len(sep)): - - idc+= f[x].readline().strip() + sep[x] - ID.write(idc) - idc='' - -for x in range(len(sep)): - f[x].close() -ID.close() -shutil.rmtree(directory) - - -fileId = open(os.path.join('temp', 'ID.txt'), "r") -fileSeq = open(os.path.join('temp', genome_name+'.Seq.d'), "r") -fileQual = open(os.path.join('temp', genome_name+'.Qual2'), "r") - -Final = open(genome_name+'_fastq', 'w') - -fileSeq.readline() -Sequence= fileSeq.readline().strip() -position=0 - -for y in range(iterate): - Final.write(fileId.readline()) - temp_Qual = fileQual.readline().strip() - Final.write(Sequence[position : position+len(temp_Qual)]+'\n') - position+=len(temp_Qual) - Final.write('+\n') - Final.write(temp_Qual+'\n') - - - -fileId.close() -fileSeq.close() -fileQual.close() -Final.close() -shutil.rmtree('temp') -print "Decompression time in seconds = ", (datetime.now() - t1).seconds, "\n" \ No newline at end of file