From 9029fd37e8b43f16aa1d8da9d2cd351edd90c858 Mon Sep 17 00:00:00 2001 From: Subrata Saha Date: Wed, 28 Sep 2016 11:08:58 -0400 Subject: [PATCH] Create Utilities.java --- POMP-DETECT/Utilities.java | 1374 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1374 insertions(+) create mode 100644 POMP-DETECT/Utilities.java diff --git a/POMP-DETECT/Utilities.java b/POMP-DETECT/Utilities.java new file mode 100644 index 0000000..85fdbd2 --- /dev/null +++ b/POMP-DETECT/Utilities.java @@ -0,0 +1,1374 @@ +//package spliced; + +//import multicore.CallBackTest; + +import java.io.*; +import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; + +public class Utilities { + + public static int kMerLength; + public static List reads; + public static String mod_sequence; + public static int division; + public static int min; + public static CopyOnWriteArrayList informations; + public static HashMap> hash_map_one; + public static HashMap> hash_map_two; + public static List union_list; + public static int contigs; + public static int contigs_coverage; + public static int read_length; + public static int point; + + public static String chromosome_name = "chr1"; + public static String pre_process = "off"; + public static String index_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAIN_INDEX/chr"; + public static String map_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAP/map_100.sam"; + public static String umrq_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAP/umrs_100.fq"; + public static String umrs_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAP/umrs_100.fa"; + public static String coverage_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAP/coverage_100.cov"; + public static String reads_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/READS/100.1.fastq"; + public static String sequence_file_folder = ""; + + public static int mismatch_for_full_alignment = 2; + public static int mismatch_for_half_alignment = 1; + public static int alignments = 100; + public static int threads = 10; + public static int consensus_length = 208; + public static double hamming_dist_threshold = 0; + public static int threshold = 1; + public static int break_point = 1; + public static int number_of_threads = 1; + + + public Utilities(String properties_file_name) throws IOException { + informations = new CopyOnWriteArrayList(); + union_list = new ArrayList(); + division = 10000; + kMerLength = 5; + min = 35; + + FileInputStream fileInputStream = new FileInputStream(properties_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int counter = 1; + + while ((line = bufferedReader.readLine()) != null) { + if (counter == 2) { + index_file_name = line; + } else if (counter == 4) { + map_file_name = line; + } else if (counter == 6) { + umrq_file_name = line; + } else if (counter == 8) { + umrs_file_name = line; + } else if (counter == 10) { + reads_file_name = line; + } else if (counter == 12) { + coverage_file_name = line; + } else if (counter == 14) { + mismatch_for_full_alignment = Integer.parseInt(line); + } else if (counter == 16) { + mismatch_for_half_alignment = Integer.parseInt(line); + } else if (counter == 18) { + alignments = Integer.parseInt(line); + } else if (counter == 20) { + threads = Integer.parseInt(line); + } else if (counter == 22) { + consensus_length = Integer.parseInt(line); + } else if (counter == 24) { + chromosome_name = line; + } else if (counter == 26) { + pre_process = line; + } else if (counter == 28) { + hamming_dist_threshold = Double.parseDouble(line); + } else if (counter == 30) { + threshold = Integer.parseInt(line); + } else if (counter == 32) { + break_point = Integer.parseInt(line); + } else if (counter == 34) { + sequence_file_folder = line; + } else if (counter == 40) { + number_of_threads = Integer.parseInt(line); + } + counter++; + } + + bufferedReader.close(); + } + + public List read_contigs(String reads_file_name, int read_length) throws IOException { + List list = new ArrayList(); + FileInputStream fileInputStream = new FileInputStream(reads_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + while ((line = bufferedReader.readLine()) != null) { + list.add(line.toUpperCase()); + if (line.length() > read_length) { + contigs++; + } + } + bufferedReader.close(); + return list; + } + + public static String readSequence(String sequenceFileName) throws IOException { + System.out.println("READING SEQUENCE..."); + + FileInputStream fileInputStream = new FileInputStream(sequenceFileName); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + StringBuilder builder = new StringBuilder(); + + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith(">")) { + builder.append(line); + } + } + + System.out.println("SEQUENCE SIZE: " + builder.length()); + return builder.toString(); + } + + public void write(String fileName, String text, boolean append) throws IOException { + FileWriter fileWriter = new FileWriter(fileName, append); + BufferedWriter out = new BufferedWriter(fileWriter); + out.write(text); + out.close(); + } + + public List getUMRs(String file_name, int read_length) throws IOException { + System.out.println("GETTING_UMRS..."); + return read_contigs(file_name, read_length); + } + + public List PreProcessUMR(int read_id, String read) throws IOException, InterruptedException { + + List list = new ArrayList(); + List out = new ArrayList(); + String seed; + int counter = 0; + int length = read.length() / 2; + + for (int i = 0; i < 2; i++) { + + if (i == 0) { + seed = read.substring(0, length); + } else { + seed = read.substring(length); + } + + if (counter > 0) { + out = new ArrayList(); + } else if (i == 0) { + out = hash_map_one.get(read_id); + } else if (i == 1) { + out = hash_map_two.get(read_id); + } + + if (out == null) { + out = new ArrayList(); + } + + ArrayList positions = new ArrayList(); + + for (PartInfo partInfo : out) { + Position position = new Position(); + position.setPosition(partInfo.getPosition()); + position.setMismatch(partInfo.getMismatch()); + position.setStrand(partInfo.isStrand()); + positions.add(position); + counter++; + } + + UMRInfo umrInfo = new UMRInfo(); + umrInfo.setSeed(seed); + umrInfo.setPositions(positions); + list.add(umrInfo); + } + + if (counter > 0) { + return list; + } else { + + return new ArrayList(); + } + } + + public List PreProcessUMR(int read_id, String read, int point) throws IOException, InterruptedException { + + List list = new ArrayList(); + List out = new ArrayList(); + String seed; + int counter = 0; + + int begin_index = point * 25; + int end_index = begin_index + 50; + + if (read.length() - end_index < 25) { + end_index = read.length(); + } + + String new_read = read.substring(begin_index, end_index); + + int length = 25; + + for (int i = 0; i < 2; i++) { + + if (i == 0) { + seed = new_read.substring(0, length); + } else { + seed = new_read.substring(length); + } + + if (counter > 0) { + out = new ArrayList(); + } else if (i == 0) { + out = hash_map_one.get(read_id); + } else if (i == 1) { + out = hash_map_two.get(read_id); + } + + if (out == null) { + out = new ArrayList(); + } + + ArrayList positions = new ArrayList(); + + for (PartInfo partInfo : out) { + Position position = new Position(); + position.setPosition(partInfo.getPosition()); + position.setMismatch(partInfo.getMismatch()); + position.setStrand(partInfo.isStrand()); + positions.add(position); + counter++; + } + + UMRInfo umrInfo = new UMRInfo(); + umrInfo.setSeed(seed); + umrInfo.setPositions(positions); + list.add(umrInfo); + } + + if (counter > 0) { + return list; + } else { + return new ArrayList(); + } + } + + + public List processUMR(List list, int hamming_distance, int min_length, int read_id, int contig_length) { + List info_list = new ArrayList(500); + + for (int i = 0; i < list.size(); i++) { + List positions = list.get(i).getPositions(); + + for (Position position : positions) { + List informations = extend_seed(position, i, list, hamming_distance, min_length, read_id); + + if (informations.size() > 0) { + for (Information information : informations) { + information.setContig_id(read_id); + information.setIntron_length(Math.abs(information.getRight_boundary() - information.getLeft_boundary() + 1)); + if (information.getPart_no() == 0) { + information.setLeft_mismatch(information.getLeft_mismatch() + position.getMismatch()); + information.setRight_mismatch(information.getRight_mismatch()); + } else if (information.getPart_no() == 1) { + information.setRight_mismatch(information.getRight_mismatch() + position.getMismatch()); + information.setLeft_mismatch(information.getLeft_mismatch()); + } + + if (contig_length > read_length) { + information.setCoverage(true); + } else { + information.setCoverage(false); + } + } + } + info_list.addAll(informations); + } + } + return info_list; + } + + public static void add_coverage(List list, boolean decision, int left_boundary, int right_boundary, + int left_length, int right_length) { + + int index = left_boundary; + int index_value = 0; + + if (decision) { + for (int l = 0; l < left_length; l++) { + index_value = list.get(index); + list.set(index, index_value + contigs_coverage); + index--; + } + } else { + for (int l = 0; l < left_length; l++) { + index_value = list.get(index); + list.set(index, index_value + 1); + index--; + } + } + + index = right_boundary; + + if (decision) { + for (int r = 0; r < right_length; r++) { + index_value = list.get(index); + list.set(index, index_value + contigs_coverage); + index++; + } + } else { + for (int r = 0; r < right_length; r++) { + index_value = list.get(index); + list.set(index, index_value + 1); + index++; + } + } + } + + public List processUMR(int read_id, int contig_length) { + + //int length = 25;//read_length / 2; + int length = read_length / 2; + + List info_list = new ArrayList(); + + List list_one = hash_map_one.get(read_id); + List list_two = hash_map_two.get(read_id); + + for (int i = 0; i < list_one.size(); i++) { + PartInfo partInfoOne = list_one.get(i); + for (int j = 0; j < list_two.size(); j++) { + PartInfo partInfoTwo = list_two.get(j); + + if (partInfoOne.isStrand() && partInfoTwo.isStrand()) { + if (partInfoOne.getPosition() < partInfoTwo.getPosition()) { + + Information information = new Information(); + information.setLeft_boundary(partInfoOne.getPosition() + length); + information.setRight_boundary(partInfoTwo.getPosition()); + information.setContig_id(read_id); + information.setLeft_mismatch(partInfoOne.getMismatch()); + information.setRight_mismatch(partInfoTwo.getMismatch()); + information.setIntron_length(Math.abs(information.getRight_boundary() - information.getLeft_boundary() + 1)); + + information.setLeft_length(length); + information.setRight_length(length); + + if (contig_length > read_length) { + information.setCoverage(true); + } else { + information.setCoverage(false); + } + + info_list.add(information); + } + } else if (!partInfoOne.isStrand() && !partInfoTwo.isStrand()) { + if (partInfoOne.getPosition() > partInfoTwo.getPosition()) { + + Information information = new Information(); + information.setLeft_boundary(partInfoTwo.getPosition() + length); + information.setRight_boundary(partInfoOne.getPosition()); + information.setLeft_mismatch(partInfoTwo.getMismatch()); + information.setRight_mismatch(partInfoOne.getMismatch()); + information.setContig_id(read_id); + information.setIntron_length(Math.abs(information.getRight_boundary() - information.getLeft_boundary() + 1)); + + information.setLeft_length(length); + information.setRight_length(length); + + if (contig_length > read_length) { + information.setCoverage(true); + } else { + information.setCoverage(false); + } + + info_list.add(information); + + } + } + } + } + return info_list; + } + + public String getReverseString(String str) { + StringBuilder revString = new StringBuilder(str); + for (int i = 0; i < revString.length(); i++) { + if (revString.charAt(i) == 'A') { + revString.setCharAt(i, 'T'); + } else if (revString.charAt(i) == 'T') { + revString.setCharAt(i, 'A'); + } else if (revString.charAt(i) == 'G') { + revString.setCharAt(i, 'C'); + } else if (revString.charAt(i) == 'C') { + revString.setCharAt(i, 'G'); + } + } + + return revString.reverse().toString(); + } + + public List extend_seed(Position position, int index, List list, int distance, int min_length, int read_id) { + + List info_list = new ArrayList(); + + String seed = list.get(index).getSeed(); + int left_extension = position.getPosition(); + int right_extension = position.getPosition() + seed.length() - 1; + + String left_seed = (index > 0 && list.get(index - 1).getPositions().size() <= 0) ? list.get(index - 1).getSeed() : ""; + String right_seed = (index < list.size() - 1 && list.get(index + 1).getPositions().size() <= 0) ? list.get(index + 1).getSeed() : ""; + + if (!position.isStrand()) { + String interim = right_seed; + right_seed = getReverseString(left_seed); + left_seed = getReverseString(interim); + } + + if (left_seed.length() > 0) { + List informations = extend_right_seed_left(left_seed, seed.length(), left_extension, distance, min_length); + info_list.addAll(informations); + } + + if (right_seed.length() > 0) { + List informations = extend_left_seed_right(right_seed, seed.length(), right_extension, distance, min_length); + info_list.addAll(informations); + } + + return info_list; + } + + public List extend_right_seed_left(String left_seed, int right_seed_length, int left_extension, + int max_ham_distance, int min_length) { + List info_list = new ArrayList(); + + int left_length = left_seed.length(); + int hamming_distance = 0; + + for (int i = left_length - 1; i >= 0; i--) { + + if (mod_sequence.charAt(left_extension - 1) == left_seed.charAt(i)) { + left_extension--; + } else if (hamming_distance < max_ham_distance) { + left_extension--; + hamming_distance++; + } else { + left_seed = left_seed.substring(0, i + 1); + break; + } + + if (left_extension - 1 < 0) { + left_seed = ""; + break; + } + } + + + if (left_seed.length() >= min_length && left_seed.length() <= left_length) { + + if (left_seed.length() >= hamming_dist_threshold * min_length) { + hamming_distance = 1; + } else { + hamming_distance = 0; + } + + List alignments = align(left_seed, -1, left_extension, hamming_distance); + + if (alignments.size() > 0) { + int extension = left_length - left_seed.length() + 1; + + for (Align alignment : alignments) { + Information information = new Information(); + information.setLeft_boundary(alignment.getIndex()); + information.setRight_boundary(left_extension); + information.setLeft_mismatch(alignment.getMismatches()); + information.setPart_no(0); + information.setLeft_length(left_seed.length()); + information.setRight_length(extension + right_seed_length); + + info_list.add(information); + } + } + } + + return info_list; + } + + public List extend_left_seed_right(String right_seed, int left_seed_length, + int right_extension, int max_ham_distance, int min_length) { + List info_list = new ArrayList(); + + int right_length = right_seed.length(); + int hamming_distance = 0; + + for (int i = 0; i < right_length; i++) { + + if (mod_sequence.charAt(right_extension + 1) == right_seed.charAt(i)) { + right_extension++; + } else if (hamming_distance < max_ham_distance) { + right_extension++; + hamming_distance++; + } else { + right_seed = right_seed.substring(i); + break; + } + + if (right_extension + 1 >= mod_sequence.length() - 1) { + right_seed = ""; + break; + } + } + + if (right_seed.length() >= min_length && right_seed.length() <= right_length) { + + if (right_seed.length() >= hamming_dist_threshold * min_length) { + hamming_distance = 1; + } else { + hamming_distance = 0; + } + + List alignments = align(right_seed, 1, right_extension, hamming_distance); + + if (alignments.size() > 0) { + + int extension = right_length - right_seed.length() + 1; + + for (Align alignment : alignments) { + Information information = new Information(); + information.setLeft_boundary(right_extension); + information.setRight_boundary(alignment.getIndex()); + information.setRight_mismatch(alignment.getMismatches()); + information.setPart_no(1); + information.setRight_length(left_seed_length + extension); + information.setLeft_length(right_seed.length()); + + info_list.add(information); + } + } + } + + return info_list; + } + + public List align(String mod_seed, int direction, int extension, int hamming_distance) { + + String kMer, search_space; + List alignments = new ArrayList(); + List kmer_list = new ArrayList(); + HashMap> hashMap; + + for (int index = 0; index < mod_seed.length() - kMerLength + 1; index++) { + kMer = mod_seed.substring(index, index + kMerLength); + kmer_list.add(kMer); + } + + if (direction < 0) { + + search_space = ""; + int previous_begin_index = 0; + + for (int starting_threshold = division; starting_threshold <= threshold; starting_threshold += division) { + + int begin_index = extension - starting_threshold + 1; + + if (begin_index < 0) { + begin_index = 0; + } + + if (search_space.length() <= 0) { + search_space = mod_sequence.substring(begin_index, extension); + } else { + search_space = mod_sequence.substring(begin_index, previous_begin_index); + } + + hashMap = new HashMap>(); + buildKMerFromRead(search_space, kMerLength, hashMap); + + int add = begin_index + mod_seed.length() - 1; + + for (int index = 0; index < kmer_list.size(); index++) { + kMer = kmer_list.get(index); + + List pos_list = getPositions(kMer, hashMap); + List occurrence_list = getOccurrences(pos_list, mod_seed, search_space, hamming_distance, index, add, alignments); + alignments.addAll(occurrence_list); + } + + if (begin_index == 0 || alignments.size() > 0) { + break; + } + + previous_begin_index = begin_index; + } + + } else { + + search_space = ""; + int previous_begin_index = 0; + int add; + + for (int starting_threshold = division; starting_threshold <= threshold; starting_threshold += division) { + + int end_index = extension + starting_threshold - 1; + + if (end_index >= mod_sequence.length()) { + end_index = mod_sequence.length(); + } + + if (search_space.length() <= 0) { + search_space = mod_sequence.substring(extension, end_index); + add = extension; + } else { + search_space = mod_sequence.substring(previous_begin_index, end_index); + add = previous_begin_index; + } + + hashMap = new HashMap>(); + buildKMerFromRead(search_space, kMerLength, hashMap); + + for (int index = 0; index < kmer_list.size(); index++) { + kMer = kmer_list.get(index); + + List pos_list = getPositions(kMer, hashMap); + List occurrence_list = getOccurrences(pos_list, mod_seed, search_space, hamming_distance, index, add, alignments); + alignments.addAll(occurrence_list); + } + + if (end_index == mod_sequence.length() || alignments.size() > 0) { + break; + } + + previous_begin_index = end_index; + } + + } + return alignments; + } + + public List getOccurrences(List pos_list, String seed, String search_space, + int max_dist, int index, int add, List alignments) { + List list = new ArrayList(); + + for (Integer position : pos_list) { + + int hamming_distance = 0; + int start_position = position - index; + + if (start_position < 0 || start_position + seed.length() - 1 >= search_space.length()) { + continue; + } + + if (isExists(alignments, (start_position + add))) { + continue; + } + + try { + for (int i = 0; i < seed.length(); i++) { + if (search_space.charAt(i + start_position) != seed.charAt(i)) { + hamming_distance++; + if (hamming_distance > max_dist) { + break; + } + } + } + if (hamming_distance <= max_dist) { + + Align align = new Align(); + align.setIndex(start_position + add); + align.setMismatches(hamming_distance); + + list.add(align); + } + } catch (Exception x) { + System.out.println(x.getMessage()); + } + } + + return list; + } + + public boolean isExists(List list, int position) { + for (Align align : list) { + if (align.getIndex() == position) { + return true; + } + } + return false; + } + + public void buildKMerFromRead(String read, int kMerLength, HashMap> hashMap) { + int iteration = read.length() - kMerLength + 1; + + for (int i = 0; i < iteration; i++) { + String kMer = read.substring(i, i + kMerLength); + + NewKMer newKMer = new NewKMer(); + newKMer.setStartPosition(i); + newKMer.setkMer(kMer); + int key = kMer.hashCode(); + + if (hashMap.containsKey(key)) { + List list = hashMap.get(key); + list.add(newKMer); + } else { + List list = new ArrayList(); + list.add(newKMer); + hashMap.put(key, list); + } + } + } + + public static List getPositions(String kMer, HashMap> hashMap) { + List pos_list = new ArrayList(); + int key = kMer.hashCode(); + + if (!hashMap.containsKey(key)) { + return pos_list; + } + + List list = hashMap.get(key); + + for (NewKMer newKMer : list) { + if (newKMer.getkMer().equals(kMer)) { + int startPosition = newKMer.getStartPosition(); + pos_list.add(startPosition); + } + } + return pos_list; + } + + public void run_bowtie(String reads_file_name, String index_file, String map_file_name, String umrq_file_name, int mismatch, int alignments, int threads) throws IOException, InterruptedException { + + System.out.println("RUNNING BOWTIE..."); + + String command = "./bowtie --quiet --sam --sam-nohead -k " + String.valueOf(alignments) + " -v " + String.valueOf(mismatch) + " -p " + String.valueOf(threads) + " --un " + umrq_file_name + " " + index_file + " -q " + reads_file_name + " " + map_file_name; + System.out.println(command); + + //String command = "./bowtie --quiet -k " + String.valueOf(alignments) + " -v " + String.valueOf(mismatch) + " -p " + String.valueOf(threads) + " --un " + umrs_file_name + " " + index_file + " -q " + reads_file_name; + //System.out.println(command); + + + String line; + Process p = Runtime.getRuntime().exec(command); + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + p.waitFor(); + System.out.println("DONE..."); + } + + public List run_bowtie(String part, String index_file, int mismatch, int max_alignments) throws IOException, InterruptedException { + + List list = new ArrayList(); + + String command = "./bowtie -k " + String.valueOf(max_alignments) + " -v " + String.valueOf(mismatch) + " " + index_file + " --best --suppress 1,5,6,7 -c " + part; + + String line; + Process p = Runtime.getRuntime().exec(command); + + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + + while ((line = bri.readLine()) != null) { + if (list.size() <= max_alignments / 2) { + list.add(line); + } + } + bri.close(); + + bre.close(); + p.waitFor(); + + return list; + } + + public void run_bowtie_build(String sequence_file_name, String index_file) throws IOException, InterruptedException { + + System.out.println("RUNNING BOWTIE-BUIELD..."); + + String command = "./bowtie-build --quiet -f " + sequence_file_name + " " + index_file; + System.out.println(command); + + String line; + Process p = Runtime.getRuntime().exec(command); + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + p.waitFor(); + System.out.println("DONE..."); + } + + public void run_contigs_generator(String umrs_file_name, String contigs_file_name, int consensus_length) throws IOException, InterruptedException { + + System.out.println("RUNNING CONTIGS_GENERATOR..."); + + String command = "./a.out " + umrs_file_name + " " + contigs_file_name + " " + String.valueOf(consensus_length); + String line; + Process p = Runtime.getRuntime().exec(command); + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + p.waitFor(); + System.out.println("DONE..."); + } + + + public class Information { + private int contig_id; + private int part_no; + private int left_boundary; + private int right_boundary; + private int left_mismatch; + private int right_mismatch; + private int left_length; + private int right_length; + + private int intron_length; + private boolean coverage; + + public boolean isCoverage() { + return coverage; + } + + public void setCoverage(boolean coverage) { + this.coverage = coverage; + } + + public int getContig_id() { + return contig_id; + } + + public void setContig_id(int contig_id) { + this.contig_id = contig_id; + } + + public int getIntron_length() { + return intron_length; + } + + public void setIntron_length(int intron_length) { + this.intron_length = intron_length; + } + + public int getLeft_length() { + return left_length; + } + + public void setLeft_length(int left_length) { + this.left_length = left_length; + } + + public int getRight_length() { + return right_length; + } + + public void setRight_length(int right_length) { + this.right_length = right_length; + } + + public int getPart_no() { + return part_no; + } + + public void setPart_no(int part_no) { + this.part_no = part_no; + } + + public int getLeft_mismatch() { + return left_mismatch; + } + + public void setLeft_mismatch(int left_mismatch) { + this.left_mismatch = left_mismatch; + } + + public int getRight_mismatch() { + return right_mismatch; + } + + public void setRight_mismatch(int right_mismatch) { + this.right_mismatch = right_mismatch; + } + + public int getLeft_boundary() { + return left_boundary; + } + + public void setLeft_boundary(int left_boundary) { + this.left_boundary = left_boundary; + } + + public int getRight_boundary() { + return right_boundary; + } + + public void setRight_boundary(int right_boundary) { + this.right_boundary = right_boundary; + } + } + + public class UMRInfo { + private List positions; + private String seed; + private boolean extended; + + public boolean isExtended() { + return extended; + } + + public void setExtended(boolean extended) { + this.extended = extended; + } + + public String getSeed() { + return seed; + } + + public void setSeed(String seed) { + this.seed = seed; + } + + public List getPositions() { + return positions; + } + + public void setPositions(List positions) { + this.positions = positions; + } + } + + public class NewKMer { + private String kMer; + private int startPosition; + + public String getkMer() { + return kMer; + } + + public void setkMer(String kMer) { + this.kMer = kMer; + } + + public int getStartPosition() { + return startPosition; + } + + public void setStartPosition(int startPosition) { + this.startPosition = startPosition; + } + } + + public class Position { + private boolean strand; + private int mismatch; + private int position; + + public int getMismatch() { + return mismatch; + } + + public void setMismatch(int mismatch) { + this.mismatch = mismatch; + } + + public boolean isStrand() { + return strand; + } + + public void setStrand(boolean strand) { + this.strand = strand; + } + + public int getPosition() { + return position; + } + + public void setPosition(int position) { + this.position = position; + } + } + + public int read_and_write(String read_file_name, String out_file_name) throws IOException { + FileInputStream fileInputStream = new FileInputStream(read_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int count = 0; + boolean accept = false; + + FileWriter fileWriter = new FileWriter(out_file_name, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + while ((line = bufferedReader.readLine()) != null) { + + if (line.startsWith("@")) { + accept = true; + } else if (accept) { + String text = ">" + String.valueOf(count) + "\n" + line + "\n"; + out.write(text); + accept = false; + count++; + } + } + + out.close(); + bufferedReader.close(); + return count; + } + + public int count_reads(String read_file_name) throws IOException { + FileInputStream fileInputStream = new FileInputStream(read_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int count = 0; + boolean accept = true; + + while ((line = bufferedReader.readLine()) != null) { + + if (!line.contains(">")) { + if (accept) { + read_length = line.length(); + accept = false; + } + count++; + } + } + + bufferedReader.close(); + return count; + } + + public void read_and_write_fragmented_contigs(String readFileName, String out_file_one, String out_file_two) throws IOException { + + FileInputStream fileInputStream = new FileInputStream(readFileName); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + + FileWriter fileWriter = new FileWriter(out_file_one, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + FileWriter fileWriter2 = new FileWriter(out_file_two, false); + BufferedWriter out2 = new BufferedWriter(fileWriter2); + + String line, preamble, part_one, part_two; + int counter = 0; + + while ((line = bufferedReader.readLine()) != null) { + + part_one = line.substring(0, line.length() / 2); + part_two = line.substring(line.length() / 2); + preamble = ">" + String.valueOf(counter) + "\n" + part_one + "\n"; + out.write(preamble); + preamble = ">" + String.valueOf(counter) + "\n" + part_two + "\n"; + out2.write(preamble); + counter++; + } + + out.close(); + out2.close(); + bufferedReader.close(); + + System.out.println("TOTAL SIZE: " + counter); + } + + public HashMap> get_map(String map_name, String chromosome_name, int break_point) throws IOException { + + HashMap> hash_map = new HashMap>(); + + FileInputStream fileInputStream = new FileInputStream(map_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + + String line; + int read_id; + + while ((line = bufferedReader.readLine()) != null) { + + String[] strings = line.split("\t"); + + if (!strings[2].equals(chromosome_name)) { + continue; + } + + read_id = Integer.parseInt(strings[0]); + + PartInfo partInfo = new PartInfo(); + + if (strings[1].equalsIgnoreCase("+")) { + partInfo.setStrand(true); + } else { + partInfo.setStrand(false); + } + + partInfo.setPosition(Integer.parseInt(strings[3])); + + if (strings.length > 4 && strings[4].length() > 0) { + partInfo.setMismatch(strings[4].split(",").length); + } else { + partInfo.setMismatch(0); + } + + if (hash_map.containsKey(read_id)) { + List list = hash_map.get(read_id); + if (list.size() <= break_point) { + list.add(partInfo); + } + } else { + List list = new ArrayList(); + list.add(partInfo); + hash_map.put(read_id, list); + } + } + return hash_map; + } + + public class PartInfo { + private int position; + private int mismatch; + private boolean strand; + + public int getPosition() { + return position; + } + + public void setPosition(int position) { + this.position = position; + } + + public int getMismatch() { + return mismatch; + } + + public void setMismatch(int mismatch) { + this.mismatch = mismatch; + } + + public boolean isStrand() { + return strand; + } + + public void setStrand(boolean strand) { + this.strand = strand; + } + + + } + + public void run(String contigs_file_name, String index_file, String map_file_one, String map_file_two, int alignments, + int mismatch, int threads) throws IOException, InterruptedException { + + String out_file_one = "part_1.fa"; + String out_file_two = "part_2.fa"; + + read_and_write_fragmented_contigs(contigs_file_name, out_file_one, out_file_two); + + System.out.println("RUNNING BOWTIE..."); + + String command = "./bowtie --quiet --best --suppress 5,6,7 -k " + String.valueOf(alignments) + " -v " + String.valueOf(mismatch) + " -p " + String.valueOf(threads) + " " + index_file + " -f " + out_file_one + " " + map_file_one; + System.out.println(command); + + + String line; + Process p = Runtime.getRuntime().exec(command); + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + p.waitFor(); + System.out.println("DONE..."); + + command = "./bowtie --quiet --best --suppress 5,6,7 -k " + String.valueOf(alignments) + " -v " + String.valueOf(mismatch) + " -p " + String.valueOf(threads) + " " + index_file + " -f " + out_file_two + " " + map_file_two; + System.out.println(command); + + + p = Runtime.getRuntime().exec(command); + bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + p.waitFor(); + System.out.println("DONE..."); + } + + public static void main(String[] args) throws IOException, InterruptedException { + + Utilities utilities = new Utilities("properties.prop"); + + String chromosome_name = args[0]; + + String map_file_one = args[1]; + String map_file_two = args[2]; + point = Integer.parseInt(args[3]); + + String contigs_file_name = "contigs.fa"; + int total_reads = utilities.count_reads(umrs_file_name); + System.out.println("TOTAL_READS: " + total_reads); + + hash_map_one = utilities.get_map(map_file_one, chromosome_name, break_point); + System.out.println("HASH_MAP_ONE_SIZE: " + hash_map_one.size()); + + hash_map_two = utilities.get_map(map_file_two, chromosome_name, break_point); + System.out.println("HASH_MAP_TWO_SIZE: " + hash_map_two.size()); + + String mod_sequence_file = sequence_file_folder + chromosome_name + ".fa"; + + mod_sequence = Utilities.readSequence(mod_sequence_file).toUpperCase(); + int sequence_length = mod_sequence.length(); + + System.out.println("READ_LENGTH: " + read_length); + + utilities.calculate_coverage(map_file_name, coverage_file_name, chromosome_name, sequence_length, read_length); + + reads = utilities.getUMRs(contigs_file_name, read_length); + + System.out.println("CONTIGS_SIZE: " + reads.size()); + + contigs_coverage = (total_reads - reads.size()) / contigs; + + System.out.println("CONTIGS_COVERAGE: " + contigs_coverage); + + Set set_one = hash_map_one.keySet(); + Set set_two = hash_map_two.keySet(); + + Set union_set = union(set_one, set_two); + union_list.addAll(union_set); + + System.out.println("UNION_SIZE: " + union_list.size()); + + new CallBackTest().run(utilities); + + System.out.println("EXITING MAIN THREAD..."); + System.out.println("----------------------------------------------"); + + } + + public static Set union(Set setA, Set setB) { + Set tmp = new TreeSet(setA); + tmp.addAll(setB); + return tmp; + } + + public class Align { + private int index; + private int mismatches; + + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + + public int getMismatches() { + return mismatches; + } + + public void setMismatches(int mismatches) { + this.mismatches = mismatches; + } + + } + + + public void calculate_coverage(String map_file_name, String coverage_file_name, + String chromosome, int reference_length, int read_length) throws IOException { + + int[] coverage = new int[reference_length]; + + FileInputStream fileInputStream = new FileInputStream(map_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + + String line; + + while ((line = bufferedReader.readLine()) != null) { + + String[] strings = line.split("\t"); + + String chr = strings[2]; + + if (chr.equalsIgnoreCase(chromosome)) { + + if (!strings[3].contains("*")) { + + int position = Integer.parseInt(strings[3]); + + for (int i = 0; i < read_length; i++) { + coverage[i + position]++; + } + } + } + } + + bufferedReader.close(); + + FileWriter fileWriter = new FileWriter(coverage_file_name, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + for (int i = 0; i < coverage.length; i++) { + out.write(coverage[i] + " "); + + if ((i + 1) % 100 == 0) { + out.write("\n"); + } + } + out.close(); + } +}