diff --git a/COMPRESSION/Assembly.java b/COMPRESSION/Assembly.java new file mode 100644 index 0000000..26c9734 --- /dev/null +++ b/COMPRESSION/Assembly.java @@ -0,0 +1,287 @@ +import java.io.IOException; +import java.util.*; + +public class Assembly { + + public static int notMapped; + public static int notMappedSize; + public static int ranSd = 0; + + public static int getTotalContigSize(List list) { + int total = 0; + for (String string : list) { + total += string.length(); + } + return total; + } + + public static HashMap greedyScoringScheme(String opFileName, + String contigsFileName, String pattern, double probability, + double elongation) throws IOException { + HashMap hashMap = new HashMap(); + List contigsList = Utilities.readContigs(contigsFileName); + int totalLength = getTotalContigSize(contigsList); + + System.out.println("REAL_CONTIGS: " + contigsList.size()); + System.out.println("TOTAL_LENGTH: " + totalLength); + + List opList = Utilities.readOpMap(opFileName); + + List contigs = getMappedContigs(contigsList, pattern, probability, elongation); + contigsList.clear(); + + System.out.println("MAPPED_CONTIGS: " + contigs.size()); + + int best = 0; + int prev_list_size = 0; + boolean passed = false; + + for (int i = 0; i < contigs.size(); i++) { + + double min = Double.MAX_VALUE; + + int slide = best; + + if (i % 50 == 0) { + System.out.println("COMPLETED: " + i); + } + + Contig contig = contigs.get(i); + List conFragList = contig.getFragmentSizeList(); + + int add = 0; + int begin = slide + prev_list_size / 2; + int end = begin + 3 * conFragList.size(); + + if (i > 0 && !passed) { + begin = 0; + end = opList.size() - conFragList.size() + 1; + } + + passed = false; + + if (end > opList.size() - conFragList.size() + 1) { + end = opList.size() - conFragList.size() + 1; + } + + for (int k = begin; k < end; k++) { + + int textPos = k + 1; + int patternPos = 1; + int missedRS = 1; + int test = 0; + double conFragSize = 0.0; + double opFragSize = 0.0; + double scoreOpt = 0.0; + double scoreCon = 0.0; + + while (true) { + if (add == 1) { + conFragSize += conFragList.get(patternPos); + } else if (add == 0) { + conFragSize = conFragList.get(patternPos); + opFragSize = opList.get(textPos); + } else if (add == 2) { + opFragSize += opList.get(textPos); + } + double sd = conFragList.get(patternPos) * (double) ranSd / 100; + double lowerBound = opFragSize - sd; + double upperBound = opFragSize + sd; + + if (conFragSize >= lowerBound && conFragSize <= upperBound) { + patternPos = patternPos + 1; + textPos = textPos + 1; + add = 0; + } else if (conFragSize < lowerBound) { + patternPos = patternPos + 1; + missedRS = missedRS + 1; + test++; + add = 1; + } else if (conFragSize > upperBound) { + missedRS = missedRS + 1; + textPos = textPos + 1; + add = 2; + } + + if (test > conFragList.size()) break; + + if (patternPos >= conFragList.size() || textPos >= opList.size()) { + + Contig new_contig = new Contig(); + new_contig.setContigId(contig.getContigId()); + new_contig.setStartingPosition(Utilities.position_list.get(k)); + + if (add == 0) { + textPos = textPos - 1; + new_contig.setEndingPosition(textPos); + } else { + new_contig.setEndingPosition(textPos); + } + + for (int index = k; index < textPos; index++) { + scoreOpt += Math.pow(opList.get(index), 1); + } + + for (int index = 0; index < patternPos; index++) { + scoreCon += Math.pow(conFragList.get(index), 1); + } + + double matchedScore = Math.abs(scoreCon - scoreOpt) + 9999 * missedRS; + + new_contig.setMatchedScore(matchedScore); + + if (min >= matchedScore) { + min = matchedScore; + best = k; + } + + List list = (List) hashMap.get(contig.getContigId()); + + if (list == null) { + list = new ArrayList(); + list.add(new_contig); + hashMap.put(contig.getContigId(), list); + passed = true; + } else { + list.add(new_contig); + hashMap.put(contig.getContigId(), list); + } + break; + } + } + } + + if (!passed) { + System.out.println("NOT_PASSED: " + i); + System.exit(0); + i = i - 1; + } + prev_list_size = conFragList.size(); + } + return hashMap; + } + + public static List getMappedContigs(List contigsList, String pattern, + double probability, double elongation) throws IOException { + int counter = 0; + List list = new ArrayList(); + Utilities.restList = new ArrayList(); + + for (String string : contigsList) { + List orderedFragmentSizeList = Utilities.getFragmentSizeList(string, pattern, probability, elongation); + + if (orderedFragmentSizeList != null && orderedFragmentSizeList.size() > 0) { + + Contig contig = new Contig(); + contig.setContigId(counter); + contig.setFragmentSizeList(orderedFragmentSizeList); + list.add(contig); + } else { + notMapped++; + notMappedSize += string.length(); + } + counter++; + } + return list; + } + + public static List getSortedList(List list) { + List sortedList = new ArrayList(); + + Contig[] values = new Contig[list.size()]; + int index = 0; + for (Contig contig : list) { + values[index] = contig; + index++; + } + + Arrays.sort(values); + for (int i = 0; i < list.size(); i++) { + sortedList.add(values[i]); + } + return sortedList; + } + + public static List greedyScheduleScheme(String opFileName, + String conFileName, String pattern, int depth, + double probability, double elongation) throws IOException { + List usedContigs = new ArrayList(); + + HashMap hashMap = greedyScoringScheme(opFileName, conFileName, pattern, probability, elongation); + + System.out.println("HASH_MAP_SIZE: " + hashMap.size()); + + Set set = hashMap.entrySet(); + Iterator iterator = set.iterator(); + + while (iterator.hasNext()) { + + Map.Entry me = (Map.Entry) iterator.next(); + List list = (List) me.getValue(); + List sortedList = getSortedList(list); + + Contig contig = getNextContig(sortedList, usedContigs, depth); + + if (contig != null) { + usedContigs.add(contig); + } + + //usedContigs.add(sortedList.get(0)); + list.clear(); + } + + hashMap.clear(); + + return usedContigs; + } + + public static boolean isOverlapped(List contigs, Contig contig) { + for (Contig con : contigs) { + int startingPosition = con.getStartingPosition(); + int endPosition = con.getEndingPosition(); + if (contig.getStartingPosition() >= startingPosition && contig.getStartingPosition() <= endPosition) { + return true; + } else if (contig.getEndingPosition() >= startingPosition && contig.getEndingPosition() <= endPosition) { + return true; + } else if (contig.getStartingPosition() <= startingPosition && contig.getEndingPosition() >= endPosition) { + return true; + } else if (contig.getStartingPosition() <= startingPosition && + (contig.getEndingPosition() >= startingPosition && contig.getEndingPosition() <= endPosition)) { + return true; + } + } + return false; + } + + public static Contig getNextContig(List sortedList, List usedContigs, int depth) { + for (int i = 0; i < sortedList.size(); i++) { + Contig contig = sortedList.get(i); + boolean overlapped = isOverlapped(usedContigs, contig); + if (!overlapped) { + return contig; + } + + if (i >= depth) break; + } + return null; + } + + public static String getRandomPattern(int size) { + String pattern = ""; + Random rand = new Random(); + for (int i = 0; i < size; i++) { + int randomNumber = rand.nextInt(4); + if (randomNumber == 0) { + pattern += "a"; + } else if (randomNumber == 1) { + pattern += "g"; + } else if (randomNumber == 2) { + pattern += "c"; + } else if (randomNumber == 3) { + pattern += "t"; + } + } + return pattern; + } + +}