Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Daoyuan/daoyuanSN.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
129 lines (110 sloc)
4.26 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, glob, os, collections,sys | |
from decimal import Decimal | |
from math import log | |
from string import ascii_lowercase | |
# parameters = [ 'TL{all}', 'r(A<->C){1}', 'r(A<->G){1}', 'r(A<->T){1}', 'r(C<->G){1}', 'r(C<->T){1}', 'r(G<->T){1}', 'r(A<->C){2}', 'r(A<->G){2}', 'r(A<->T){2}', 'r(C<->G){2}', 'r(C<->T){2}', 'r(G<->T){2}', 'r(A<->C){3}', 'r(A<->G){3}', 'r(A<->T){3}', 'r(C<->G){3}', 'r(C<->T){3}', 'r(G<->T){3}', 'pi(A){1}', 'pi(C){1}', 'pi(G){1}', 'pi(T){1}', 'pi(A){2}', 'pi(C){2}', 'pi(G){2}', 'pi(T){2}', 'pi(A){3}', 'pi(C){3}', 'pi(G){3}', 'pi(T){3}', 'alpha{1}', 'alpha{2}', 'alpha{3}', 'm{1}', 'm{2}', 'm{3}'] | |
# parameter = 'speciation' | |
# parameter = parameters[1] | |
contain = [] | |
gene_list = ['Prior1.log.txt', 'Posterior1.log.txt'] | |
angio = {} | |
for fname in gene_list: | |
lines = open(fname, 'r').readlines() | |
# print lines | |
headers = lines[0].strip().split() | |
i = headers.index('tmrca(Anacostia)') | |
# print i | |
angio[fname] = [] | |
for line in lines[1:]: | |
parts = line.strip().split() | |
angio[fname].append(float(parts[i])) | |
# print fname,len(angio[fname]) | |
# angio['Prior.log'] = [127.1125949594009,127.58479381380802,...] | |
# sys.exit('aborted') | |
# print angio['Prior.log.txt'] | |
# print angio['Posterior.log.txt'] | |
################################## | |
##partitioning specific parameter values into 3 groups | |
def partition(values_1, values_2, k): | |
###sorting values of chosen key | |
values = sorted(values_1) | |
# print 'values=', values | |
values2 = sorted(values_2) | |
length_values2 = len(values2) | |
length = int(len(values)) | |
# print 'length=', length | |
###creating 3 partitons of values | |
parts = [(length/k)*(i+1) for i in range(k-1)] | |
# parts = [length/3, (length/3)*2] | |
# print 'parts=', parts | |
all_list = [] | |
for n in parts: | |
i=0 | |
###creates lists of n, n*2, n*3 values | |
diff = 200000.0 | |
diff_list = None | |
while int(len(values[i+1:])) >= n: | |
a_diff = values[i+n-1]-values[i] | |
b_diff = values[i+n]-values[i+1] | |
if a_diff < diff: | |
diff = a_diff | |
diff_list = values[i:i+n] | |
elif b_diff < diff: | |
diff = b_diff | |
diff_list = values[i+1:i+n+1] | |
i+=1 | |
n+=length/k | |
all_list.append(diff_list) | |
# print 'all_list=', all_list | |
collect2 = [0.0]*(length/k) | |
# print collect2 | |
collect = [] | |
i =0 | |
# print 'len=', all_list[0], all_list[1] | |
collect.append(all_list[0]) | |
for i in range(len(all_list)-1): | |
a = [e for e in all_list[i+1] if e not in all_list[i]] | |
collect.append(a) | |
collect.append([e for e in values if e not in all_list[-1]]) | |
collect2 = [0.0]*(k-1) | |
i=0 | |
while i <= k-2: | |
tmp = [] | |
for m in values2: | |
if (m >= collect[i][0]) and (m<=collect[i][-1]): | |
tmp.append(m) | |
collect2[i]+=1 | |
# print values2 | |
values2 = [e for e in values2 if e not in tmp] | |
# print 'tmp==', tmp, values2 | |
i+=1 | |
# print collect2, length_values2, sum(collect2) | |
collect2.append(length_values2-sum(collect2)) | |
# print 'collect2', collect2 | |
Hp1 = log(k) | |
Hp2_list = [] | |
# if float(collect2 | |
for i in collect2: | |
if i !=0: | |
Hp2_list.append((-i/length) * log(i/length)) | |
# print Hp2_list | |
Hp2 = sum(Hp2_list) | |
# Hp2 = ((-A2/Total_len) * log(A2/Total_len))+((-B2/Total_len) * log(B2/Total_len))+((-C2/Total_len) * log(C2/Total_len)) | |
M = 100*(1.-(Hp1-Hp2)/log(k)) | |
for i,m in enumerate(collect): | |
print 'D1 k=%d has n=%d' % (i, len(m)) | |
print '*********************' | |
for i,m in enumerate(collect2): | |
print 'D2 k=%d has n=%d' % (i, int(m)) | |
print '*******************************************' | |
# print 'Compatability of D1 and D2, M(D2|D1) =', M | |
return M | |
k = 5 | |
result1 = partition(angio['Prior1.log.txt'], angio['Posterior1.log.txt'], k) | |
result2 = partition(angio['Posterior1.log.txt'], angio['Prior1.log.txt'], k) | |
print 'Compatability of D1 and D2, M(D2|D1) =%d', result1 | |
print 'Compatability of D1 and D2, M(D1|D2) =%d', result2 | |
print 'Average Compatability of D1 and D2, {M(D2|D1)+ M(D1|D2)}/2 =', (result1+result2)/2. | |