diff --git a/randomSeq.py b/randomSeq.py deleted file mode 100644 index 4925a91..0000000 --- a/randomSeq.py +++ /dev/null @@ -1,211 +0,0 @@ -import sys, re, os, random - - -# number1 = raw_input('number_of_runs? > ') -# num_run = int(number1) -# #print num_run -# -# ###specifying number of taxa to be generated -# number2 = raw_input('number_of_taxa? > ') -# ntax = int(number2) -# -# ###specifying number of sites to be generated -# number3 = raw_input('number of sites? > ') -# num_sites = int(number3) -# -# ###specifying number of dna matrix to be generated -# number4 = raw_input('number of genes? > ') -# number_genes = int(number4) - - - -num_run = 1 -#print num_run - -###specifying number of taxa to be generated -ntax = 6 - -###specifying number of sites to be generated -num_sites = 10000 - -###specifying number of dna matrix to be generated -number_genes = 10 - -num_trials_pergene = 2 - - -for i in range(num_run): - run_name = i+1 - run_name2 = 'trial'+ str(run_name) - print run_name2 - master_dir = os.path.join(os.path.abspath(os.curdir), run_name2) - if not os.path.exists(master_dir): - os.mkdir(master_dir) - - - ###creating a list for the directories for each randomly generated dna matrix - all_seq = [] - sets = [] - for a, b in enumerate(range(number_genes)): - a += 1 - set = "gene"+ str(a) - sets.append(set) - - ### creating dna matrix with randomly generated sequences - for n, i in enumerate(sets): - ### specifying names of the directory and dna matrix - folder_name = i - n += 1 - new_nexus_file = 'randomseq'+ str(n)+'.nex' - new_nexus_file2 = 'randomseq'+ str(n) - ### creating a list of randomly generated sequences - sequences = [] - for i in range(ntax): - myrandom = [] - for i in range(num_sites): - i = random.randint(1,4) - myrandom.append(i) - dna_list = [] - for m in myrandom: - if m == 1: - m= "A" - if m ==2: - m= "T" - if m ==3: - m= "G" - if m == 4: - m= "C" - dna_list.append(m) - dna_list2 = ''.join(dna_list) - #print dna_list2 - sequences.append(dna_list2) - all_seq.append(sequences) - - - - ### creating a list of taxon_names - taxon_names = [] - k=0 - for i, m in enumerate(sequences): - k += 1 - taxon = "taxon"+ str(k) - taxon_names.append(taxon) - - - ### creating a directory - new_dir = os.path.join(master_dir,folder_name) - if not os.path.exists(new_dir): - os.mkdir(new_dir) - gene_sets = [] -# for i in range(3): - for i in range(num_trials_pergene): - m= i+1 - seed_num = 4648+m - m2 = 'run'+str(m) - gene_sets.append(m2) - new_subdir = os.path.join(new_dir,m2) - if not os.path.exists(new_subdir): - os.mkdir(new_subdir) - - bash_filename = 'qsub.sh' - bash_file_content = '''#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -python runphycas.py -/common/galax/rungalax.sh --treefile trees.t --skip 1 -'''% (new_nexus_file2) - -# # ### saving dna matrix to the directory -# - full_path1 = os.path.join(new_subdir, new_nexus_file) - newf = open(full_path1, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path2 = os.path.join(new_subdir, bash_filename) - newf = open(full_path2, 'w') - newf.write(bash_file_content) - newf.close() - - full_path8 = os.path.join(new_subdir, 'runphycas.py') - newf = open(full_path8, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(int(seed_num), new_nexus_file)) - -# -# -# - combined_sequences = map(''.join, zip(*all_seq)) - num_sites_combined = len(combined_sequences[0]) - new_dir2 = os.path.join(master_dir,'combinedSeq') - if not os.path.exists(new_dir2): - os.mkdir(new_dir2) - - full_path3 = os.path.join(new_dir2, 'combinedSeq.nex') - newf = open(full_path3, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites_combined)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, combined_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path4 = os.path.join(new_dir2, bash_filename) - newf = open(full_path4, 'w') - newf.write(bash_file_content) - newf.close() - - - - full_path6 = os.path.join(new_dir2, 'runphycas.py') - newf = open(full_path6, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(4648, 'combinedSeq.nex')) - - - full_path7 = os.path.join(master_dir, 'submitall.sh') - newf = open(full_path7, 'w') - newf.write('#!/bin/bash\n') - for i in sets: - for m in gene_sets: - newf.write('cd %s/%s; qsub qsub.sh;cd .. ;cd .. ; \n' %(i, m)) - newf.write('cd %s; qsub qsub.sh; cd ..\n' %('combinedSeq')) - newf.close() - - full_path9 = os.path.join(master_dir, 'treelist.txt') - newf = open(full_path9, 'w') - for i in sets: - for m in gene_sets: - newf.write(i+'/'+m+'/trees.t\n') - newf.close() - - - - full_path10 = os.path.join(master_dir, 'galax.sh') - newf = open(full_path10, 'w') - newf.write('#!/bin/bash\n') - newf.write('/common/galax/rungalax.sh --listfile treelist.txt --skip 1') - newf.close() - - - -