Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
First commit with code
- Loading branch information
Moria
committed
Sep 28, 2015
0 parents
commit 12a2079
Showing
5 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
10 5 | ||
ACCAC | ||
ACAAC | ||
CCACC | ||
ACAAC | ||
AACCA | ||
CAACC | ||
CCAAC | ||
CAACA | ||
AACAA | ||
CAACA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
20 10 | ||
ACCCACCCCC | ||
ACAACAACCA | ||
ACAACCACCA | ||
AACAACCACC | ||
CCACCCCCCA | ||
ACCACCACCC | ||
AACAACAACC | ||
CCAACAACAA | ||
CCACCACCCA | ||
CCCCCCACAC | ||
ACCACCCACC | ||
CACCCACCCC | ||
CACCCCCCAC | ||
AACCACCACC | ||
CCCACCCCCC | ||
CAACCACCAC | ||
CACCACCCAC | ||
ACCCCCCACA | ||
CCACCCACCC | ||
CAACAACCAC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
10 25 | ||
CATACGTAAAAGAAAGGCGGACAAT | ||
AAGAGGAAAAAGAAACGGGGACAAT | ||
CAGAGGTAAAAGTAAGGCGGACTAT | ||
GAGAGGTATAAGAAAGCGGGACAAT | ||
GAGAGGTAAAAGTAAGGGGGACAAA | ||
CAGAGGTAACAGAAAGGCGGACGAT | ||
TAGAGGTAACAGAAAGCGGGACAAT | ||
CAGACGTTAAAGAAAGGCGGACAAT | ||
CCGAGGTAAAACAAAGGGGAACAAT | ||
AAGAGTTAAAAGAAAGGGGGAAAAT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
10 100 | ||
CTGGCGGTGGCTATCATCCGTCCCTCATGCGAGTGGTCGGTGATAGCTCGGAAGTGTGAGGAATCCGTAGAGAATGAACTAAGTAGTTCACCTTACCCTC | ||
CCAACACTCATATCGTCTTGCTACTTGACTCCTTTTTTATTCATATTTTCAATACTCGACCTTCCACGAAGGCTGGGATTCACCTCCCTTTCCGCTGAAT | ||
CAGAGGTAAAAGAAAGGGGGACAATGATAATCGTAGAATTAAATAAGACAGGTGTCGATTACGACCCATTTCCCTTCGCTTACGGATGTATAGGCGTCTC | ||
AGTATAGGGTGAAATAGCGGCTATAACCCATTTCCATGCGGACTCGGAACTGCTAGTAGTCTCAGTCATCGGGATCAACGTTGATATGCTAGGTCCGAGA | ||
GGAGGTGTCTACAGACAGCCGCCCAAAGTAAGGCGGAATGGTCGTAAGAGCTTTCCTCGTCGTCGACTAAGATTATCTTCTTATGAAACAACGAGACCTT | ||
CAAAATTGAAAGTCTGTAAGGATATAGAGGACTCCCGCATTTCAGCAACCTATGATGGCTAGGCTTCATCTACCCCGCTGGGTCTCATCCCTGGTTTTCC | ||
GGCCGTCGAGCCTGCTCCAATTCTCCACTATCGGGCTTGCCCCTAGTAAGAAGCGCTCAGTCCACGGTACGGCAACGCAGTAAAAACACTTAGACTAAAG | ||
CATTGACTATAGCTTGAGTCGCGTGCATGTTGTTACAATCCTCAACCCTCGGGCGAGCGGAACTTGTCTTCTCAGCTTGCTTTCAAAGGCCTTACCCTTC | ||
GTCCGCACTTGCCTACCTAAGGCTGGACGCAACACCAATATTAACGGGCTTAGCCTGGAGTTGACCGGATCCTGGCCGACCTCTACTCGGCGCCCGTTGC | ||
GCACCCATCTCAGTAGTGTCACGAACGAGGATAGCTGACGATGTAAATGTTCTGGTCAAGCCGGCATTGCGGTGTGAATTAACTATCTGCCTACGCAGGG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
''' | ||
PROBLEM STATEMENT | ||
The closest sequence problem is defined as follows: | ||
Given m DNA sequences s1, . . . , sm, each of length n, find a DNA sequence t of length | ||
n such that maxi=1,...,m dH(t, si) is minimized, where dH(t, si) denotes the Hamming distance | ||
between t and si. | ||
For this project you must implement a method for finding optimum solutions to the closest sequence | ||
problem. You can either implement a branch-and-bound algorithm or use integer programming in | ||
conjunction with optimization engines such as the GNU Linear Programming Kit (GLPK). | ||
INPUT | ||
Your program should read from the standard input a line containing integers m and n, followed by | ||
m lines each containing a DNA sequence of length n. | ||
Sample input: | ||
10 25 | ||
CTGGCGGTGGCTATCATCCGTCCCT | ||
CATGCGAGTGGTCGGTGATAGCTCG | ||
GAAGTGTGAGGAATCCGTAGAGAAT | ||
GAACTAAGTAGTTCACCTTACCCTC | ||
CCAACACTCATATCGTCTTGCTACT | ||
TGACTCCTTTTTTATTCATATTTTC | ||
AATACTCGACCTTCCACGAAGGCTG | ||
GGATTCACCTCCCTTTCCGCTGAAT | ||
CAGAGGTAAAAGAAAGGGGGACAAT | ||
GATAATCGTAGAATTAAATAAGACA | ||
OUTPUT | ||
If you use integer programming, your program should print to the standard output an integer program | ||
model of the input problem instance in lp format (see http://lpsolve.sourceforge.net/5.1/CPLEXformat.htm). | ||
''' | ||
|
||
# Imports | ||
import os | ||
|
||
# Absolutepath to inputs | ||
path_to_inputs = "/home/moria/Projects/Bioinformatics/CenterStringLP/Inputs/" | ||
path_to_outputs = "/home/moria/Projects/Bioinformatics/CenterStringLP/Outputs/" | ||
bases = ['A','C','T','G'] | ||
|
||
|
||
for fn in os.listdir(path_to_inputs): | ||
# Split filename into prefix, suffix | ||
pre,suff = fn.split('.') | ||
|
||
# Reset line counter | ||
line_num = 0 | ||
S = [] | ||
# Opeb the file | ||
f_in = open(path_to_inputs+fn, 'r') | ||
f_out = open(path_to_outputs+pre+'.lp', 'w') | ||
for line in f_in: | ||
if line_num == 0: | ||
# get number of strings, length of strings | ||
m,n = line.split(' ') | ||
m = int(m) | ||
n = int(n) | ||
line_num += 1 | ||
else: | ||
S.append(str(line)) | ||
|
||
f_out.write("Minimize\n") | ||
f_out.write(" obj: d\n") | ||
f_out.write("Subject To\n") | ||
count = 0 | ||
for s in S: | ||
equation = " " | ||
s = s.strip() | ||
equation += "c" + str(count) + ": " + str(n) | ||
count += 1 | ||
for i,v in enumerate(s): | ||
equation += " - " + str(v) + str(i) | ||
equation += " - d <= 0\n" | ||
f_out.write(equation) | ||
|
||
for number in range(n): | ||
constraint = " c" + str(count) + ": 0 <=" | ||
count += 1 | ||
for letter in bases: | ||
constraint += " + " + letter + str(number) | ||
constraint += " <= 1\n" | ||
|
||
f_out.write(constraint) | ||
f_out.write("Binaries\n") | ||
for number in range(n): | ||
for letter in bases: | ||
f_out.write( | ||
|
||
|
||
|
||
f_in.close() | ||
f_out.close() | ||
|