From 6883d2d2d1c7fde7e554be10a6f2e4d2960aa812 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:28 -0400 Subject: [PATCH] Delete readSeq.py --- readSeq.py | 69 ------------------------------------------------------ 1 file changed, 69 deletions(-) delete mode 100644 readSeq.py diff --git a/readSeq.py b/readSeq.py deleted file mode 100644 index 91c58a5..0000000 --- a/readSeq.py +++ /dev/null @@ -1,69 +0,0 @@ -def patterns(sequence_file): - # - import re, os, glob, itertools, fnmatch, sys, shutil - from itertools import combinations - from collections import Counter - - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -# path = os.path.join(script_dir, 'nexus') - - genes = [] - data = {} -# print 'Reading nexus files...' - for filename in glob.glob(os.path.join(script_dir, sequence_file)): - - m = re.match('(.+).nex', os.path.basename(filename)) - gene_name = m.group(1) -# print 'gene_name=', gene_name - genes.append(gene_name) - f = open(filename, 'r').read() - - m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) - ntax = int(m.group(1)) -# print 'ntax=', ntax - - m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) - nchar = int(m.group(1)) -# print 'nchar=', nchar - - m = re.search('Matrix\s+(.+?);', f, re.M | re.S) - matrix = m.group(1).strip() - matrix_lines = matrix.split('\n') - - taxon_names = [] - sequences = {} - sequences_list = [] - for line in matrix_lines: - parts = line.strip().split() - assert len(parts) == 2 - taxon_name = parts[0] - sequence = parts[1] - - taxon_names.append(taxon_name) - sequences_list.append(sequence) - sequences[taxon_name] = sequence - - pattern_list = [] - - k=0 - while k < nchar: - site_pattern = '' - for i,m in enumerate(sequences_list): - site_pattern += m[k] - pattern_list.append(site_pattern) - k+=1 - pattern_dict = dict() - for i in pattern_list: - pattern_dict[i] = pattern_dict.get(i, 0) + 1 - - tmp = [] - for key in pattern_dict.keys(): ###convert dict to key of tupules -# print 'key=', key - tmp.append((pattern_dict[key],key)) - - sorted_values = sorted(tmp) ###sorted according to key smaller to larger - sorted_values.sort(cmp = lambda x,y:cmp(x[1],y[1])) ###sorted according to values in alphabetical order - - return pattern_dict - -