Skip to content
Permalink
Browse files

Create Assembly.java

  • Loading branch information
sus11005 committed Sep 28, 2016
0 parents commit 2f82d5847c966af2b70b8b44ea78ae59085aa733
Showing with 287 additions and 0 deletions.
  1. +287 −0 COMPRESSION/Assembly.java
@@ -0,0 +1,287 @@
import java.io.IOException;
import java.util.*;

public class Assembly {

public static int notMapped;
public static int notMappedSize;
public static int ranSd = 0;

public static int getTotalContigSize(List<String> list) {
int total = 0;
for (String string : list) {
total += string.length();
}
return total;
}

public static HashMap greedyScoringScheme(String opFileName,
String contigsFileName, String pattern, double probability,
double elongation) throws IOException {
HashMap hashMap = new HashMap();
List<String> contigsList = Utilities.readContigs(contigsFileName);
int totalLength = getTotalContigSize(contigsList);

System.out.println("REAL_CONTIGS: " + contigsList.size());
System.out.println("TOTAL_LENGTH: " + totalLength);

List<Double> opList = Utilities.readOpMap(opFileName);

List<Contig> contigs = getMappedContigs(contigsList, pattern, probability, elongation);
contigsList.clear();

System.out.println("MAPPED_CONTIGS: " + contigs.size());

int best = 0;
int prev_list_size = 0;
boolean passed = false;

for (int i = 0; i < contigs.size(); i++) {

double min = Double.MAX_VALUE;

int slide = best;

if (i % 50 == 0) {
System.out.println("COMPLETED: " + i);
}

Contig contig = contigs.get(i);
List<Double> conFragList = contig.getFragmentSizeList();

int add = 0;
int begin = slide + prev_list_size / 2;
int end = begin + 3 * conFragList.size();

if (i > 0 && !passed) {
begin = 0;
end = opList.size() - conFragList.size() + 1;
}

passed = false;

if (end > opList.size() - conFragList.size() + 1) {
end = opList.size() - conFragList.size() + 1;
}

for (int k = begin; k < end; k++) {

int textPos = k + 1;
int patternPos = 1;
int missedRS = 1;
int test = 0;
double conFragSize = 0.0;
double opFragSize = 0.0;
double scoreOpt = 0.0;
double scoreCon = 0.0;

while (true) {
if (add == 1) {
conFragSize += conFragList.get(patternPos);
} else if (add == 0) {
conFragSize = conFragList.get(patternPos);
opFragSize = opList.get(textPos);
} else if (add == 2) {
opFragSize += opList.get(textPos);
}
double sd = conFragList.get(patternPos) * (double) ranSd / 100;
double lowerBound = opFragSize - sd;
double upperBound = opFragSize + sd;

if (conFragSize >= lowerBound && conFragSize <= upperBound) {
patternPos = patternPos + 1;
textPos = textPos + 1;
add = 0;
} else if (conFragSize < lowerBound) {
patternPos = patternPos + 1;
missedRS = missedRS + 1;
test++;
add = 1;
} else if (conFragSize > upperBound) {
missedRS = missedRS + 1;
textPos = textPos + 1;
add = 2;
}

if (test > conFragList.size()) break;

if (patternPos >= conFragList.size() || textPos >= opList.size()) {

Contig new_contig = new Contig();
new_contig.setContigId(contig.getContigId());
new_contig.setStartingPosition(Utilities.position_list.get(k));

if (add == 0) {
textPos = textPos - 1;
new_contig.setEndingPosition(textPos);
} else {
new_contig.setEndingPosition(textPos);
}

for (int index = k; index < textPos; index++) {
scoreOpt += Math.pow(opList.get(index), 1);
}

for (int index = 0; index < patternPos; index++) {
scoreCon += Math.pow(conFragList.get(index), 1);
}

double matchedScore = Math.abs(scoreCon - scoreOpt) + 9999 * missedRS;

new_contig.setMatchedScore(matchedScore);

if (min >= matchedScore) {
min = matchedScore;
best = k;
}

List<Contig> list = (List<Contig>) hashMap.get(contig.getContigId());

if (list == null) {
list = new ArrayList<Contig>();
list.add(new_contig);
hashMap.put(contig.getContigId(), list);
passed = true;
} else {
list.add(new_contig);
hashMap.put(contig.getContigId(), list);
}
break;
}
}
}

if (!passed) {
System.out.println("NOT_PASSED: " + i);
System.exit(0);
i = i - 1;
}
prev_list_size = conFragList.size();
}
return hashMap;
}

public static List<Contig> getMappedContigs(List<String> contigsList, String pattern,
double probability, double elongation) throws IOException {
int counter = 0;
List<Contig> list = new ArrayList<Contig>();
Utilities.restList = new ArrayList<Contig>();

for (String string : contigsList) {
List<Double> orderedFragmentSizeList = Utilities.getFragmentSizeList(string, pattern, probability, elongation);

if (orderedFragmentSizeList != null && orderedFragmentSizeList.size() > 0) {

Contig contig = new Contig();
contig.setContigId(counter);
contig.setFragmentSizeList(orderedFragmentSizeList);
list.add(contig);
} else {
notMapped++;
notMappedSize += string.length();
}
counter++;
}
return list;
}

public static List<Contig> getSortedList(List<Contig> list) {
List<Contig> sortedList = new ArrayList<Contig>();

Contig[] values = new Contig[list.size()];
int index = 0;
for (Contig contig : list) {
values[index] = contig;
index++;
}

Arrays.sort(values);
for (int i = 0; i < list.size(); i++) {
sortedList.add(values[i]);
}
return sortedList;
}

public static List<Contig> greedyScheduleScheme(String opFileName,
String conFileName, String pattern, int depth,
double probability, double elongation) throws IOException {
List<Contig> usedContigs = new ArrayList<Contig>();

HashMap hashMap = greedyScoringScheme(opFileName, conFileName, pattern, probability, elongation);

System.out.println("HASH_MAP_SIZE: " + hashMap.size());

Set set = hashMap.entrySet();
Iterator iterator = set.iterator();

while (iterator.hasNext()) {

Map.Entry me = (Map.Entry) iterator.next();
List<Contig> list = (List<Contig>) me.getValue();
List<Contig> sortedList = getSortedList(list);

Contig contig = getNextContig(sortedList, usedContigs, depth);

if (contig != null) {
usedContigs.add(contig);
}

//usedContigs.add(sortedList.get(0));
list.clear();
}

hashMap.clear();

return usedContigs;
}

public static boolean isOverlapped(List<Contig> contigs, Contig contig) {
for (Contig con : contigs) {
int startingPosition = con.getStartingPosition();
int endPosition = con.getEndingPosition();
if (contig.getStartingPosition() >= startingPosition && contig.getStartingPosition() <= endPosition) {
return true;
} else if (contig.getEndingPosition() >= startingPosition && contig.getEndingPosition() <= endPosition) {
return true;
} else if (contig.getStartingPosition() <= startingPosition && contig.getEndingPosition() >= endPosition) {
return true;
} else if (contig.getStartingPosition() <= startingPosition &&
(contig.getEndingPosition() >= startingPosition && contig.getEndingPosition() <= endPosition)) {
return true;
}
}
return false;
}

public static Contig getNextContig(List<Contig> sortedList, List<Contig> usedContigs, int depth) {
for (int i = 0; i < sortedList.size(); i++) {
Contig contig = sortedList.get(i);
boolean overlapped = isOverlapped(usedContigs, contig);
if (!overlapped) {
return contig;
}

if (i >= depth) break;
}
return null;
}

public static String getRandomPattern(int size) {
String pattern = "";
Random rand = new Random();
for (int i = 0; i < size; i++) {
int randomNumber = rand.nextInt(4);
if (randomNumber == 0) {
pattern += "a";
} else if (randomNumber == 1) {
pattern += "g";
} else if (randomNumber == 2) {
pattern += "c";
} else if (randomNumber == 3) {
pattern += "t";
}
}
return pattern;
}

}

0 comments on commit 2f82d58

Please sign in to comment.
You can’t perform that action at this time.