From df545907c9011e25a931335a45c37950a3299079 Mon Sep 17 00:00:00 2001 From: Subrata Saha Date: Wed, 28 Sep 2016 11:10:46 -0400 Subject: [PATCH] Create Statistics.java --- POMP-PRUNE/Statistics.java | 2101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 2101 insertions(+) create mode 100644 POMP-PRUNE/Statistics.java diff --git a/POMP-PRUNE/Statistics.java b/POMP-PRUNE/Statistics.java new file mode 100644 index 0000000..6eea5a7 --- /dev/null +++ b/POMP-PRUNE/Statistics.java @@ -0,0 +1,2101 @@ +//package spliced; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; +import java.util.*; + +public class Statistics { + + public static HashMap> nusrs; + public static HashMap> usrs; + public static HashMap> msrs; + public static HashMap> junctions; + public static HashMap hash_map; + public static List accuracy_list; + public static HashMap distribution_left; + public static HashMap distribution_right; + + public static int threshold; + public static int clustering_constant; + public static int left_shift; + public static int right_shift; + public static int minimum_length; + public static double cut_off; + public static int iteration; + public static int kmer_length; + public static int traverse; + public static int shift; + public static int number_of_kmers; + public static int precision; + + public static String reference_file_name = "/home/sus11005/Data/CODE/ERGC/TAR/HG19/chr21.fa"; + public static String cov_file_name = "/home/sus11005/Data/CODE/SPLICED/DATA/NEW/MAP/coverage_50.cov"; + public static String chromosome_name = "chr21"; + public static String _switch = "off"; + public static int read_length = 50; + + public Statistics(String file_name) throws IOException { + + threshold = 0; + clustering_constant = 30; + precision = 20; + left_shift = 4; + right_shift = 5; + minimum_length = 10; + cut_off = 1.2; + iteration = 15; + + kmer_length = 5; + traverse = 40; + shift = 3; + number_of_kmers = 10; + + nusrs = new HashMap>(); + usrs = new HashMap>(); + msrs = new HashMap>(); + junctions = new HashMap>(); + accuracy_list = new ArrayList(); + distribution_left = new HashMap(); + distribution_right = new HashMap(); + hash_map = new HashMap(); + + FileInputStream fileInputStream = new FileInputStream(file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int counter = 1; + + while ((line = bufferedReader.readLine()) != null) { + if (counter == 2) { + cut_off = Double.parseDouble(line); + } else if (counter == 4) { + iteration = Integer.parseInt(line); + } else if (counter == 6) { + chromosome_name = line; + } else if (counter == 8) { + _switch = line; + } else if (counter == 10) { + cov_file_name = line; + } else if (counter == 14) { + read_length = Integer.parseInt(line); + } else if (counter == 12) { + reference_file_name = line; + } + counter++; + } + bufferedReader.close(); + } + + public static long getCpuTime() { + ThreadMXBean bean = ManagementFactory.getThreadMXBean(); + return bean.isCurrentThreadCpuTimeSupported() ? bean.getCurrentThreadCpuTime() : 0L; + } + + + public List get_distribution_left(String reference, List pos_junctions, int traverse, int shift, int length) { + + for (Information information : pos_junctions) { + int left_boundary = information.getLeft_boundary(); + + int starting_position = left_boundary - traverse; + int ending_position = left_boundary + shift; + + for (int i = starting_position; i < ending_position - length + 1; i++) { + String kMer = reference.substring(i, i + length).toUpperCase(); + + if (distribution_left.containsKey(kMer)) { + int count = distribution_left.get(kMer); + distribution_left.put(kMer, count + 1); + } else { + distribution_left.put(kMer, 1); + } + } + } + + List list = new ArrayList(); + + for (Object o : distribution_left.entrySet()) { + Map.Entry pair = (Map.Entry) o; + + list.add(new KMer((String) pair.getKey(), (Integer) pair.getValue())); + } + + Collections.sort(list, new Comparator() { + + public int compare(KMer o1, KMer o2) { + return o2.count - o1.count; + } + }); + + return list; + } + + public List get_distribution_right(String reference, List pos_junctions, int traverse, int shift, int length) { + + for (Information information : pos_junctions) { + int right_boundary = information.getRight_boundary(); + + int starting_position = right_boundary - shift; + int ending_position = right_boundary + traverse; + + for (int i = starting_position; i < ending_position - length + 1; i++) { + String kMer = reference.substring(i, i + length).toUpperCase(); + + if (distribution_right.containsKey(kMer)) { + int count = distribution_right.get(kMer); + distribution_right.put(kMer, count + 1); + } else { + distribution_right.put(kMer, 1); + } + } + } + + List list = new ArrayList(); + + for (Object o : distribution_right.entrySet()) { + Map.Entry pair = (Map.Entry) o; + + list.add(new KMer((String) pair.getKey(), (Integer) pair.getValue())); + } + + Collections.sort(list, new Comparator() { + + public int compare(KMer o1, KMer o2) { + return o2.count - o1.count; + } + }); + + return list; + } + + public HashMap> get_unique_spliced_reads(String file_name) throws IOException { + HashMap> intermediate_map = new HashMap>(); + + FileInputStream fileInputStream = new FileInputStream(file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int counter = 0; + + while ((line = bufferedReader.readLine()) != null) { + + String[] strings = line.split("\t"); + + int read_id = Integer.parseInt(strings[0]); + int left_mismatch = Integer.parseInt(strings[1]); + int right_mismatch = Integer.parseInt(strings[2]); + int left_boundary = Integer.parseInt(strings[3]); + int right_boundary = Integer.parseInt(strings[4]); + int left_length = Integer.parseInt(strings[5]); + int right_length = Integer.parseInt(strings[6]); + int intron_length = Integer.parseInt(strings[7]); + + boolean isCovered = strings[8].equalsIgnoreCase("1"); + + Information information = new Information(); + + information.setContig_id(read_id); + + if (left_boundary > right_boundary) { + int temp = left_boundary; + left_boundary = right_boundary; + right_boundary = temp; + } + + information.setLeft_boundary(left_boundary); + information.setRight_boundary(right_boundary); + information.setLeft_mismatch(left_mismatch); + information.setRight_mismatch(right_mismatch); + information.setRight_length(right_length); + information.setLeft_length(left_length); + information.setIntron_length(intron_length); + information.setCoverage(isCovered); + + boolean is_exist = false; + + for (Object o : intermediate_map.entrySet()) { + + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List infos = (List) entry.getValue(); + + for (Information info : infos) { + int left = info.getLeft_boundary(); + int right = info.getRight_boundary(); + + if (Math.abs(left - left_boundary) <= clustering_constant && Math.abs(right - right_boundary) <= clustering_constant) { + infos.add(information); + is_exist = true; + break; + } + } + } + + if (!is_exist) { + counter += 1; + List list = new ArrayList(); + list.add(information); + intermediate_map.put(counter, list); + } + } + + compute_unique_junction(intermediate_map); + + return compute_unique_reads_list(); + } + + public void compute_unique_junction(HashMap> intermediate_map) { + + int counter = 0; + + for (Object o : intermediate_map.entrySet()) { + Map.Entry entry = (Map.Entry) o; + int key_value = (Integer) entry.getKey(); + + double donor_site = 0.0; + double acceptor_site = 0.0; + + @SuppressWarnings("unchecked") + List infos = (List) entry.getValue(); + + List donor_sites = new ArrayList(); + List acceptor_sites = new ArrayList(); + + for (Information information : infos) { + + donor_sites.add(information.getLeft_boundary()); + acceptor_sites.add(information.getRight_boundary()); + + donor_site = donor_site + information.getLeft_boundary(); + acceptor_site = acceptor_site + information.getRight_boundary(); + } + + int mode_donor_site = 0; //getMode(donor_sites); + int mode_acceptor_site = 0; //getMode(acceptor_sites); + + if (mode_donor_site == 0) { + donor_site = donor_site / infos.size(); + } else { + donor_site = mode_donor_site; + } + + if (mode_acceptor_site == 0) { + acceptor_site = acceptor_site / infos.size(); + } else { + acceptor_site = mode_acceptor_site; + } + + Junction junction = new Junction(); + junction.setDonor_site((int) donor_site); + junction.setAcceptor_site((int) acceptor_site); + + if (!is_already_contains(junction, infos)) { + for (Information information : infos) { + information.setLeft_boundary((int) donor_site); + information.setRight_boundary((int) acceptor_site); + } + junctions.put(junction, infos); + } else { + counter++; + } + } + + System.out.println("DUPLICATE: " + counter); + System.out.println("JUNCTIONS_SIZE: " + junctions.size()); + } + + public boolean is_already_contains(Junction junction, List list) { + for (Object o : junctions.entrySet()) { + Map.Entry entry = (Map.Entry) o; + Junction jun = (Junction) entry.getKey(); + + @SuppressWarnings("unchecked") + List infos = (List) entry.getValue(); + + if (jun.getAcceptor_site() == junction.getAcceptor_site() && jun.getDonor_site() == junction.getDonor_site()) { + + for (Information information : list) { + information.setLeft_boundary(jun.getDonor_site()); + information.setRight_boundary(jun.getAcceptor_site()); + } + infos.addAll(list); + return true; + } + } + return false; + } + + public HashMap> compute_unique_reads_list() { + + HashMap> hash_map = new HashMap>(); + + for (Object o : junctions.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List infos = (List) entry.getValue(); + + for (Information information : infos) { + int read_id = information.getContig_id(); + + if (hash_map.containsKey(read_id)) { + List list = hash_map.get(read_id); + list.add(information); + } else { + List list = new ArrayList(); + list.add(information); + hash_map.put(read_id, list); + } + } + } + + System.out.println("CONTIGS_SIZE: " + hash_map.size()); + + return hash_map; + } + + public static int getMode(List values) { + HashMap freqs = new HashMap(); + + for (int val : values) { + Integer freq = freqs.get(val); + freqs.put(val, (freq == null ? 1 : freq + 1)); + } + + int mode = 0; + int maxFreq = 0; + + for (Map.Entry entry : freqs.entrySet()) { + int freq = entry.getValue(); + if (freq > maxFreq) { + maxFreq = freq; + mode = entry.getKey(); + } + } + + return mode; + } + + + public HashMap> get_spliced_reads(String file_name) throws IOException { + HashMap> hash_map = new HashMap>(); + + FileInputStream fileInputStream = new FileInputStream(file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + int counter = 0; + + while ((line = bufferedReader.readLine()) != null) { + + String[] strings = line.split("\t"); + + int read_id = Integer.parseInt(strings[0]); + int left_mismatch = Integer.parseInt(strings[1]); + int right_mismatch = Integer.parseInt(strings[2]); + int left_boundary = Integer.parseInt(strings[3]); + int right_boundary = Integer.parseInt(strings[4]); + int left_length = Integer.parseInt(strings[5]); + int right_length = Integer.parseInt(strings[6]); + int intron_length = Integer.parseInt(strings[7]); + + boolean isCovered = strings[8].equalsIgnoreCase("1"); + + + Information information = new Information(); + + information.setContig_id(read_id); + + if (left_boundary > right_boundary) { + int temp = left_boundary; + left_boundary = right_boundary; + right_boundary = temp; + } + + information.setLeft_boundary(left_boundary); + information.setRight_boundary(right_boundary); + information.setLeft_mismatch(left_mismatch); + information.setRight_mismatch(right_mismatch); + information.setRight_length(right_length); + information.setLeft_length(left_length); + information.setIntron_length(intron_length); + information.setCoverage(isCovered); + + if (junctions.size() == 0) { + Junction junction = new Junction(); + junction.setDonor_site(left_boundary); + junction.setAcceptor_site(right_boundary); + + List list = new ArrayList(); + list.add(information); + + junctions.put(junction, list); + } else { + + boolean accept = true; + for (Object o : junctions.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + Junction junction = (Junction) entry.getKey(); + @SuppressWarnings("unchecked") + List infos = (List) entry.getValue(); + + int left = junction.getDonor_site(); + int right = junction.getAcceptor_site(); + + if (Math.abs(left - left_boundary) <= threshold && Math.abs(right - right_boundary) <= threshold) { + infos.add(information); + accept = false; + } + } + + if (accept) { + Junction junction = new Junction(); + junction.setDonor_site(left_boundary); + junction.setAcceptor_site(right_boundary); + + List list = new ArrayList(); + list.add(information); + + junctions.put(junction, list); + } + } + + if (hash_map.containsKey(read_id)) { + List list = hash_map.get(read_id); + + boolean enter = false; + + for (Information info : list) { + int right = info.getRight_boundary(); + int left = info.getLeft_boundary(); + if (Math.abs(right - right_boundary) <= threshold && Math.abs(left - left_boundary) <= threshold) { + enter = true; + break; + } + } + + if (!enter) { + list.add(information); + counter++; + } + + hash_map.get(read_id).add(information); + + } else { + List list = new ArrayList(); + list.add(information); + hash_map.put(read_id, list); + counter++; + } + } + System.out.println("TOTAL_SIZE: " + counter); + bufferedReader.close(); + return hash_map; + } + + public void derive_sets(String reference, HashMap> hash_map) { + + for (Object o : hash_map.entrySet()) { + + Map.Entry entry = (Map.Entry) o; + int read_id = (Integer) entry.getKey(); + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + if (values.size() == 1) { + + int left_boundary = values.get(0).getLeft_boundary(); + int right_boundary = values.get(0).getRight_boundary(); + int left_mismatch = values.get(0).getLeft_mismatch(); + int right_mismatch = values.get(0).getRight_mismatch(); + int mismatch = left_mismatch + right_mismatch; + int min_length = Math.min(values.get(0).getLeft_length(), values.get(0).getRight_length()); + + String donor = reference.substring(left_boundary - left_shift, left_boundary + right_shift); + String acceptor = reference.substring(right_boundary - left_shift, right_boundary + right_shift); + + if ((donor.contains("GT") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("AC")) + || (donor.contains("GC") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("GC")) + || (donor.contains("AT") && acceptor.contains("AC")) || (donor.contains("GT") && acceptor.contains("AT"))) { + + List list = new ArrayList(); + list.add(values.get(0)); + usrs.put(read_id, list); + + } else if (mismatch <= 0) { + List list = new ArrayList(); + list.add(values.get(0)); + nusrs.put(read_id, list); + } + + } else { + + boolean accept = false; + + for (Information information : values) { + + int left_boundary = information.getLeft_boundary(); + int right_boundary = information.getRight_boundary(); + + String donor = reference.substring(left_boundary - left_shift, left_boundary + right_shift); + String acceptor = reference.substring(right_boundary - left_shift, right_boundary + right_shift); + + if ((donor.contains("GT") && acceptor.contains("AG")) /*|| (donor.contains("CT") && acceptor.contains("AC")) + || (donor.contains("GC") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("GC")) + || (donor.contains("AT") && acceptor.contains("AC")) || (donor.contains("GT") && acceptor.contains("AT"))*/) { + accept = true; + } else { + accept = false; + break; + } + } + + if (accept) { + msrs.put(read_id, values); + } + } + } + } + + public void _derive_sets(String reference, HashMap> hash_map) { + + for (Object o : hash_map.entrySet()) { + + Map.Entry entry = (Map.Entry) o; + int read_id = (Integer) entry.getKey(); + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + if (values.size() == 1) { + + int left_boundary = values.get(0).getLeft_boundary(); + int right_boundary = values.get(0).getRight_boundary(); + int left_mismatch = values.get(0).getLeft_mismatch(); + int right_mismatch = values.get(0).getRight_mismatch(); + int mismatch = left_mismatch + right_mismatch; + + String donor = reference.substring(left_boundary - left_shift, left_boundary + right_shift); + String acceptor = reference.substring(right_boundary - left_shift, right_boundary + right_shift); + + if ((donor.contains("GT") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("AC")) + || (donor.contains("GC") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("GC")) + || (donor.contains("AT") && acceptor.contains("AC")) || (donor.contains("GT") && acceptor.contains("AT"))) { + + List list = new ArrayList(); + list.add(values.get(0)); + usrs.put(read_id, list); + + } else if (mismatch <= 0) { + List list = new ArrayList(); + list.add(values.get(0)); + nusrs.put(read_id, list); + } + + } else { + + boolean accept = false; + + for (Information information : values) { + + int left_boundary = information.getLeft_boundary(); + int right_boundary = information.getRight_boundary(); + + String donor = reference.substring(left_boundary - left_shift, left_boundary + right_shift); + String acceptor = reference.substring(right_boundary - left_shift, right_boundary + right_shift); + + if ((donor.contains("GT") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("AC")) + || (donor.contains("GC") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("GC")) + || (donor.contains("AT") && acceptor.contains("AC")) || (donor.contains("GT") && acceptor.contains("AT"))) { + accept = true; + } else { + accept = false; + break; + } + } + + if (accept) { + msrs.put(read_id, values); + } + } + } + } + + + public List derive_positive_set() { + List list = new ArrayList(); + + for (Object o : usrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + for (Information information : values) { + + int left_mismatch = information.getLeft_mismatch(); + int right_mismatch = information.getRight_mismatch(); + int mismatches = right_mismatch + left_mismatch; + + if (information.isCoverage()) { + if (mismatches <= 0) { + list.add(information); + } + } else if (mismatches <= 0) { + + int left_boundary = information.getLeft_boundary(); + int right_boundary = information.getRight_boundary(); + int how_many = 5; + + if (can_be_accepted(left_boundary, right_boundary, how_many)) { + list.add(information); + } + } + } + } + + return list; + } + + public boolean can_be_accepted(int left, int right, int how_many) { + int counter = 0; + + for (Object o : usrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + for (Information information : values) { + int _left = information.getLeft_boundary(); + int _right = information.getRight_boundary(); + + if (Math.abs(left - _left) <= threshold && Math.abs(_right - right) <= threshold) { + + if (information.isCoverage()) { + return true; + } + + counter++; + + if (counter >= how_many) { + return true; + } + } + } + } + return false; + } + + public List derive_negative_set(HashMap> map, int min_length) { + List list = new ArrayList(); + + for (Object o : nusrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + for (Information information : values) { + int left_length = information.getLeft_length(); + int right_length = information.getRight_length(); + int length = Math.min(left_length, right_length); + int id = information.getContig_id(); + + if (isUnique(map, information.getLeft_boundary(), information.getRight_boundary(), id)) { + + if (length < min_length) { + list.add(information); + } + } + } + } + + System.out.println("-VE FROM NUSRs: " + list.size()); + + for (Object o : msrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + boolean accept = true; + + for (Information information : values) { + int _left = information.getLeft_boundary(); + int _right = information.getRight_boundary(); + + + for (Object obj : usrs.entrySet()) { + Map.Entry ent = (Map.Entry) obj; + + @SuppressWarnings("unchecked") + List vals = (List) ent.getValue(); + Information info = vals.get(0); + + int left = info.getLeft_boundary(); + int right = info.getRight_boundary(); + + if (Math.abs(left - _left) <= threshold && Math.abs(right - _right) <= threshold) { + accept = false; + break; + } + } + + if (!accept) { + break; + } + } + + if (accept) { + for (Information info : values) { + int _length = Math.min(info.getLeft_length(), info.getRight_length()); + int _mismatch = info.getLeft_mismatch() + info.getRight_mismatch(); + if (_length < min_length || _mismatch >= 3) { + list.add(info); + } + } + } + } + + return list; + } + + public List _derive_negative_set(HashMap> map, int min_length) { + List list = new ArrayList(); + + for (Object o : nusrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + + for (Information information : values) { + int left_length = information.getLeft_length(); + int right_length = information.getRight_length(); + int length = Math.min(left_length, right_length); + int id = information.getContig_id(); + + if (isUnique(map, information.getLeft_boundary(), information.getRight_boundary(), id)) { + + if (length < min_length) { + list.add(information); + } + } + } + } + + System.out.println("-VE FROM NUSRs: " + list.size()); + + for (Object o : msrs.entrySet()) { + Map.Entry entry = (Map.Entry) o; + + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + boolean accept; + + for (Information information : values) { + int _left = information.getLeft_boundary(); + int _right = information.getRight_boundary(); + + accept = true; + + for (Object obj : usrs.entrySet()) { + Map.Entry ent = (Map.Entry) obj; + + @SuppressWarnings("unchecked") + List vals = (List) ent.getValue(); + Information info = vals.get(0); + + int left = info.getLeft_boundary(); + int right = info.getRight_boundary(); + + if (Math.abs(left - _left) <= threshold && Math.abs(right - _right) <= threshold) { + accept = false; + break; + } + } + + if (accept) { + + int _length = Math.min(information.getLeft_length(), information.getRight_length()); + int _mismatch = information.getLeft_mismatch() + information.getRight_mismatch(); + + if (_length < min_length || _mismatch >= 3) { + list.add(information); + } + } + } + } + + return list; + } + + + public int get_critical_intron_length(List pos_list) { + List length_list = new ArrayList(); + + for (Information information : pos_list) { + length_list.add(information.getIntron_length()); + } + + Collections.sort(length_list, Collections.reverseOrder()); + + Set set = new HashSet(length_list); + + int critical_number = (set.size() * 95) / 100; + System.out.println("CRITICAL_NUMBER: " + critical_number); + + for (int i = 0; i < length_list.size(); i++) { + + int counter = 0; + + for (int j = i + 1; j < length_list.size(); j++) { + if (length_list.get(i) > length_list.get(j)) { + counter++; + } + } + + if (counter <= critical_number) { + return length_list.get(i); + } + + } + + int avg = 0; + + for (Integer aLength_list : length_list) { + avg += aLength_list; + } + + return avg / length_list.size(); + } + + public int get_canonical_score(Junction junction, String reference) { + int left_boundary = junction.getDonor_site(); + int right_boundary = junction.getAcceptor_site(); + + String donor = reference.substring(left_boundary - 2, left_boundary + 3); + String acceptor = reference.substring(right_boundary - 2, right_boundary + 3); + + if ((donor.contains("GT") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("AC"))) { + return 800; + } else if (donor.contains("GT") || donor.contains("CT")) { + return 400; + } else if (acceptor.contains("AG") || acceptor.contains("AC")) { + return 400; + } else if ((donor.contains("AT") && acceptor.contains("AC")) || (donor.contains("GT") && acceptor.contains("AT"))) { + return 800; + } else if (donor.contains("AT") || donor.contains("GT")) { + return 200; + } else if (acceptor.contains("AC") || acceptor.contains("AT")) { + return 200; + } else if ((donor.contains("GC") && acceptor.contains("AG")) || (donor.contains("CT") && acceptor.contains("GC"))) { + return 800; + } else if (donor.contains("GC") || donor.contains("CT")) { + return 200; + } else if (acceptor.contains("AG") || acceptor.contains("GC")) { + return 200; + } else { + return 0; + } + } + + boolean isUnique(HashMap> map, int _left, int _right, int id) { + + for (Object o : map.entrySet()) { + Map.Entry entry = (Map.Entry) o; + int read_id = (Integer) entry.getKey(); + + if (read_id != id) { + @SuppressWarnings("unchecked") + List values = (List) entry.getValue(); + for (Information info : values) { + int left = info.getLeft_boundary(); + int right = info.getRight_boundary(); + + if (Math.abs(left - _left) <= threshold && Math.abs(right - _right) <= threshold) { + return false; + } + } + } + } + return true; + } + + public List get_contracted_junctions(List list) { + List contract_list = new ArrayList(); + + contract_list.add(list.get(0)); + + for (int i = 1; i < list.size(); i++) { + Information junction = list.get(i); + + if (!isExist(contract_list, junction)) { + contract_list.add(junction); + } + } + return contract_list; + } + + public boolean isExist(List contract_list, Information junction) { + int left_index = junction.getLeft_boundary(); + int right_index = junction.getRight_boundary(); + + boolean accept = false; + + for (Information _junction : contract_list) { + int _left_index = _junction.getLeft_boundary(); + int _right_index = _junction.getRight_boundary(); + + int left = Math.abs(left_index - _left_index); + int right = Math.abs(right_index - _right_index); + + if (left <= threshold && right <= threshold) { + accept = true; + break; + } + } + return accept; + } + + public static String read_sequence(String sequence_file_name) throws IOException { + FileInputStream fileInputStream = new FileInputStream(sequence_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + StringBuilder strBuf = new StringBuilder(); + while ((line = bufferedReader.readLine()) != null) { + if (!line.startsWith(">")) { + strBuf.append(line); + } + } + return strBuf.toString(); + } + + public List getExactIntrons(String file_name, String preamble) throws IOException { + List list = new ArrayList(); + + FileInputStream fileInputStream = new FileInputStream(file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + + while ((line = bufferedReader.readLine()) != null) { + if (line.startsWith(preamble)) { + line = line.split("\t")[0]; + String[] strings = line.split("\\."); + + Information information = new Information(); + if (Integer.parseInt(strings[1]) < Integer.parseInt(strings[2])) { + information.setLeft_boundary(Integer.parseInt(strings[1])); + information.setRight_boundary(Integer.parseInt(strings[2])); + } else { + information.setLeft_boundary(Integer.parseInt(strings[2])); + information.setRight_boundary(Integer.parseInt(strings[1])); + } + list.add(information); + + } + } + bufferedReader.close(); + + return list; + } + + public List getAppIntersection(List exact, HashMap> calculated) throws IOException { + + int duplicate = 0; + List list = new ArrayList(); + + FileWriter fileWriter = new FileWriter("intersection.txt", false); + BufferedWriter out = new BufferedWriter(fileWriter); + + for (Object o : junctions.entrySet()) { + Map.Entry entry = (Map.Entry) o; + Junction junction = (Junction) entry.getKey(); + + int predicted_left_boundary = junction.getDonor_site(); + int predicted_right_boundary = junction.getAcceptor_site(); + + for (Information information : exact) { + + int left_boundary = information.getLeft_boundary(); + int right_boundary = information.getRight_boundary(); + + if (Math.abs(predicted_left_boundary - left_boundary) <= 15 && Math.abs(predicted_right_boundary - right_boundary) <= 15) { + + if (list.size() == 0) { + list.add(information); + out.write(left_boundary + " (" + predicted_left_boundary + ")" + " ----> " + right_boundary + " (" + predicted_right_boundary + ")" + "\n"); + } else { + boolean accept = true; + for (Information info : list) { + if (info.getLeft_boundary() == left_boundary && info.getRight_boundary() == right_boundary) { + duplicate++; + accept = false; + break; + } + } + if (accept) { + list.add(information); + out.write(left_boundary + " (" + predicted_left_boundary + ")" + " ----> " + right_boundary + " (" + predicted_right_boundary + ")" + "\n"); + } + } + break; + } + } + } + + out.close(); + + double sensitivity = ((double) list.size() / exact.size()) * 100.0; + double specificity = ((double) list.size() / (calculated.size() - duplicate)) * 100.0; + + Accuracy accuracy = new Accuracy(); + accuracy.sensitivity = sensitivity; + accuracy.specificity = specificity; + accuracy.predicted_introns = calculated.size(); + + accuracy_list.add(accuracy); + + System.out.println("------------------------------------------------------------------------------"); + + System.out.println("EXACT INTRONS: " + exact.size()); + System.out.println("PREDICTED INTRONS: " + (calculated.size() - duplicate)); + System.out.println("SENSITIVITY: " + sensitivity); + System.out.println("SPECIFICITY: " + specificity); + System.out.println("INTERSECTION: " + list.size()); + System.out.println("DUPLICATE: " + duplicate); + + System.out.println("------------------------------------------------------------------------------"); + + return list; + } + + public List getAppIntersection(List exact, List calculated) throws IOException { + + int duplicate = 0; + List list = new ArrayList(); + + FileWriter fileWriter = new FileWriter("intersection.txt", false); + BufferedWriter out = new BufferedWriter(fileWriter); + + for (Information information_predicted : calculated) { + + int predicted_left_boundary = information_predicted.getLeft_boundary(); + int predicted_right_boundary = information_predicted.getRight_boundary(); + + for (Information information : exact) { + + int left_boundary = information.getLeft_boundary(); + int right_boundary = information.getRight_boundary(); + + if (Math.abs(predicted_left_boundary - left_boundary) <= precision && Math.abs(predicted_right_boundary - right_boundary) <= precision) { + + if (list.size() == 0) { + list.add(information); + out.write(left_boundary + " (" + predicted_left_boundary + ")" + " ----> " + right_boundary + " (" + predicted_right_boundary + ")" + "\n"); + } else { + boolean accept = true; + for (Information info : list) { + if (info.getLeft_boundary() == left_boundary && info.getRight_boundary() == right_boundary) { + duplicate++; + accept = false; + break; + } + } + if (accept) { + list.add(information); + out.write(left_boundary + " (" + predicted_left_boundary + ")" + " ----> " + right_boundary + " (" + predicted_right_boundary + ")" + "\n"); + } + } + break; + } + } + } + + out.close(); + + double sensitivity = ((double) list.size() / exact.size()) * 100.0; + double specificity = ((double) list.size() / (calculated.size() - duplicate)) * 100.0; + + Accuracy accuracy = new Accuracy(); + accuracy.sensitivity = sensitivity; + accuracy.specificity = specificity; + accuracy.predicted_introns = calculated.size() - duplicate; + accuracy.duplicates = duplicate; + accuracy.intersection = list.size(); + + accuracy_list.add(accuracy); + + /*System.out.println("------------------------------------------------------------------------------"); + + System.out.println("EXACT INTRONS: " + exact.size()); + System.out.println("PREDICTED INTRONS: " + (calculated.size() - duplicate)); + System.out.println("SENSITIVITY: " + sensitivity); + System.out.println("SPECIFICITY: " + specificity); + System.out.println("INTERSECTION: " + list.size()); + System.out.println("DUPLICATE: " + duplicate); + + System.out.println("------------------------------------------------------------------------------");*/ + + return list; + } + + public static List merge_coverage(String coverage_file_name, List cov_one) throws IOException { + + List list = new ArrayList(); + + FileInputStream fileInputStream = new FileInputStream(coverage_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + int counter = 0; + String line; + + while ((line = bufferedReader.readLine()) != null) { + String[] strings = line.split(" "); + + for (String str : strings) { + list.add(Integer.parseInt(str) + cov_one.get(counter)); + counter++; + } + } + + bufferedReader.close(); + + System.out.println("COVERAGE_LIST_SIZE: " + list.size()); + + return list; + } + + public static List retrieve_coverage(String coverage_file_name) throws IOException { + + List list = new ArrayList(); + + FileInputStream fileInputStream = new FileInputStream(coverage_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line; + + while ((line = bufferedReader.readLine()) != null) { + String[] strings = line.split(" "); + + for (String str : strings) { + list.add(Integer.parseInt(str)); + } + } + + bufferedReader.close(); + + System.out.println("COVERAGE_LIST_SIZE: " + list.size()); + + return list; + } + + public List get_merged_coverage(List list_forward, List list_backward) { + List list = new ArrayList(); + + for (int i = 0; i < list_forward.size(); i++) { + list.add(list_forward.get(i) + list_backward.get(i)); + } + return list; + } + + + public List retrieve_data(List list, int left, int right) { + List data = new ArrayList(); + + for (String str : list) { + String[] strings = str.split("\t"); + + int _left = Integer.parseInt(strings[0]); + int _right = Integer.parseInt(strings[1]); + + if (Math.abs(left - _left) <= threshold && Math.abs(right - _right) <= threshold) { + data.add(str); + } + } + + return data; + } + + public boolean is_exist(List check_list, String check) { + for (String str : check_list) { + if (str.equalsIgnoreCase(check)) { + return true; + } + } + return false; + } + + public List post_process(String r_out_file_name) throws IOException { + List list = new ArrayList(); + + FileInputStream fileInputStream = new FileInputStream(r_out_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line, text = ""; + int counter = 0; + int add = 0; + int skip = 0; + + double junction_mapping_number = 0.0; + double shorter_length = 0.0; + double mismatches = 0.0; + + + while ((line = bufferedReader.readLine()) != null) { + + if (counter % 2 == 0) { + text = line; + } else { + line = line.trim(); + text = text.trim(); + String[] texts = text.split(" +"); + String[] lines = line.split(" +"); + + for (int i = 0; i < lines.length; i++) { + if (lines[i].equals("P")) { + String junction_id = texts[i]; + String feature_string = hash_map.get(junction_id); + feature_string = feature_string.trim(); + + //System.out.println(junction_id + "\t" + feature_string); + + String[] features = feature_string.split("\t"); + + + try { + junction_mapping_number = Double.parseDouble(features[0]); + shorter_length = Double.parseDouble(features[3]); + mismatches = Double.parseDouble(features[4]); + } catch (Exception x) { + x.printStackTrace(); + System.exit(0); + } + + if (mismatches >= 4 && junction_mapping_number <= 0) { + + skip++; + continue; + } + + if (shorter_length < minimum_length && mismatches >= 4) { + skip++; + continue; + } + + String[] junction = texts[i].split("-"); + Information information = new Information(); + information.setLeft_boundary(Integer.parseInt(junction[0])); + information.setRight_boundary(Integer.parseInt(junction[1])); + list.add(information); + + } else if (lines[i].equals("N")) { + String junction_id = texts[i]; + String feature_string = hash_map.get(junction_id); + + String[] features = feature_string.split("\t"); + + junction_mapping_number = Double.parseDouble(features[0]); + shorter_length = Double.parseDouble(features[3]); + mismatches = Double.parseDouble(features[4]); + double intron_length = Double.parseDouble(features[5]); + + if (mismatches <= 0 && shorter_length >= 25 && intron_length == 0.0) { + String[] junction = texts[i].split("-"); + Information information = new Information(); + information.setLeft_boundary(Integer.parseInt(junction[0])); + information.setRight_boundary(Integer.parseInt(junction[1])); + list.add(information); + add++; + } + } + } + } + counter++; + } + + + bufferedReader.close(); + + System.out.println("SKIPPED: " + skip + "\t" + "ADDED: " + add); + return list; + } + + public List get_predicted_introns(String in_file_name) throws IOException { + List list = new ArrayList(); + + FileInputStream fileInputStream = new FileInputStream(in_file_name); + DataInputStream dataInputStream = new DataInputStream(fileInputStream); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(dataInputStream)); + String line, text = ""; + int counter = 0; + int ns = 0; + + while ((line = bufferedReader.readLine()) != null) { + if (counter % 2 == 0) { + text = line; + } else { + line = line.trim(); + text = text.trim(); + String[] texts = text.split(" +"); + String[] lines = line.split(" +"); + + for (int i = 0; i < lines.length; i++) { + if (lines[i].equals("P")) { + String[] junction = texts[i].split("-"); + Information information = new Information(); + information.setLeft_boundary(Integer.parseInt(junction[0])); + information.setRight_boundary(Integer.parseInt(junction[1])); + list.add(information); + } else { + ns++; + } + } + } + counter++; + } + + bufferedReader.close(); + //System.out.println("SIZE: " + list.size() + " N_SIZE: " + ns); + return list; + } + + public void create_pos_neg_file(String file_name, List pos_set, List neg_set, List list) throws IOException { + + int counter = 0; + List check_list = new ArrayList(); + + FileWriter fileWriter = new FileWriter(file_name, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + String line = ""; + + for (int i = 0; i < number_of_objects; i++) { + line += String.valueOf(i) + " "; + } + + out.write(line + "class" + "\n"); + + + int count = 0; + int not_found = 0; + + Collections.shuffle(pos_set, new Random()); + + for (Information information : pos_set) { + + int left = information.getLeft_boundary(); + int right = information.getRight_boundary(); + + + List data = retrieve_data(list, left, right); + + if (data.size() <= 0) { + not_found++; + } + + for (String string : data) { + + String[] strings = string.split("\t"); + + if (is_exist(check_list, strings[2])) { + counter++; + continue; + } else { + check_list.add(strings[2]); + } + + String text = ""; + + for (int i = 2; i < strings.length; i++) { + text += strings[i] + "\t"; + } + + text += "P\n"; + out.write(text); + } + + count++; + + if (count > cut_off * neg_set.size()) { + break; + } + } + + + count = 0; + not_found = 0; + + Collections.shuffle(neg_set, new Random()); + + for (Information information : neg_set) { + int left = information.getLeft_boundary(); + int right = information.getRight_boundary(); + + List data = retrieve_data(list, left, right); + + if (data.size() <= 0) { + not_found++; + } + + for (String string : data) { + String[] strings = string.split("\t"); + + if (is_exist(check_list, strings[2])) { + counter++; + continue; + } else { + check_list.add(strings[2]); + } + + String text = ""; + + for (int i = 2; i < strings.length; i++) { + text += strings[i] + "\t"; + } + + text += "N\n"; + out.write(text); + } + + count++; + } + + out.close(); + //System.out.println("TOTAL_DUPLICATES: " + counter); + //System.out.println("NOT_FOUND -VE: " + not_found); + } + + public boolean is_exist(Junction junction, List list) { + int left_boundary = junction.getDonor_site(); + int right_boundary = junction.getAcceptor_site(); + + for (Information information : list) { + + if (Math.abs(left_boundary - information.getLeft_boundary()) <= threshold && + Math.abs(right_boundary - information.getRight_boundary()) <= threshold) { + + return true; + } + } + + return false; + } + + public static int existence_list_left(List list, String reference, int left_boundary, int traverse, int shift, int how_many) { + + String sub_sequence = reference.substring(left_boundary - traverse, left_boundary + shift).toUpperCase(); + + for (int i = 0; i < how_many; i++) { + String kMer = list.get(i).kMer; + + if (sub_sequence.contains(kMer)) { + return 1; + } + } + + return 0; + } + + public static int existence_list_right(List list, String reference, int right_boundary, int traverse, int shift, int how_many) { + + String sub_sequence = reference.substring(right_boundary - shift, right_boundary + traverse).toUpperCase(); + + for (int i = 0; i < how_many; i++) { + + String kMer = list.get(i).kMer; + + if (sub_sequence.contains(kMer)) { + return 1; + } + } + + return 0; + } + + public static int number_of_objects = 12; + + public List create_statistical_table(String out_file_name, HashMap> map, List pos_list, + List neg_list, List coverage, int read_length, int critical_length, + String reference, boolean fire, List kmer_list_left, List kmer_list_right, + List kmer_list_left_intron, List kmer_list_right_intron) throws IOException { + + int counter = 0; + List rows = new ArrayList(); + + FileWriter fileWriter = new FileWriter(out_file_name, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + String text = ""; + + for (int i = 0; i < number_of_objects; i++) { + text += String.valueOf(i) + " "; + } + + out.write(text + "\n"); + + for (Object o : junctions.entrySet()) { + Map.Entry entry = (Map.Entry) o; + Junction junction = (Junction) entry.getKey(); + + if (fire) { + if (is_exist(junction, pos_list)) { + continue; + } + + if (is_exist(junction, neg_list)) { + continue; + } + } + + @SuppressWarnings("unchecked") + List list = (List) entry.getValue(); + + int junction_mapping_number = get_junction_mapping_number(list); + + double coverage_score = get_coverage_score(coverage, junction.getDonor_site(), junction.getAcceptor_site(), read_length); + + double mapping_score = get_multiple_mapping_score(list, map); + + int shorter_length = get_shorter_length_of_alignment(list); + + double mismatches = (int) get_number_of_mismatches(list); + + double intron_size = get_intron_size(junction.getDonor_site(), junction.getAcceptor_site(), critical_length); + + double intron_coverage_score = Math.abs(get_intron_coverage_score(coverage, junction.getAcceptor_site(), + junction.getDonor_site(), read_length)); + + int canonical_score = get_canonical_score(junction, reference); + + int list_of_existence_left = existence_list_left(kmer_list_left, reference, junction.getDonor_site(), + traverse, shift, number_of_kmers); + + int list_of_existence_right = existence_list_right(kmer_list_right, reference, junction.getAcceptor_site(), + traverse, shift, number_of_kmers); + + int list_of_existence_left_intron = existence_list_left(kmer_list_left_intron, reference, junction.getDonor_site(), + shift, traverse, number_of_kmers); + + int list_of_existence_right_intron = existence_list_right(kmer_list_right_intron, reference, junction.getAcceptor_site(), + shift, traverse, number_of_kmers); + + distribution_right.clear(); + distribution_left.clear(); + + String junction_id = String.valueOf(junction.getDonor_site()) + "-" + String.valueOf(junction.getAcceptor_site()); + + text = String.valueOf(junction_mapping_number) + "\t" + + String.valueOf(coverage_score) + "\t" + + String.valueOf(mapping_score) + "\t" + + String.valueOf(shorter_length) + "\t" + + String.valueOf(mismatches) + "\t" + + String.valueOf(intron_size) + "\t" + + intron_coverage_score + "\t" + + list_of_existence_left + "\t" + + list_of_existence_right + "\t" + + list_of_existence_left_intron + "\t" + + list_of_existence_right_intron + "\t" + + canonical_score; + + String features = text; + + hash_map.put(junction_id, features); + + text = junction_id + "\t" + text; + + out.write(text + "\n"); + + text = String.valueOf(junction.getDonor_site()) + "\t" + String.valueOf(junction.getAcceptor_site()) + "\t" + text; + rows.add(text); + } + + out.close(); + + //System.out.println("--------------------> SKIPPED: " + counter); + + return rows; + } + + public int get_junction_mapping_number(List list) { + int count = 0; + + for (Information information : list) { + int id = information.getContig_id(); + if (usrs.containsKey(id)) { + count++; + } + } + return count; + } + + public double get_intron_size(int left, int right, int critical_length) { + int intron_size = right - left + 1; + + if (intron_size <= critical_length) { + return 0.0; + } + + return -Math.log(intron_size - critical_length); + } + + public double get_coverage_score(List coverage, int left_boundary, int right_boundary, int read_length) { + + double coverage_donor, coverage_acceptor; + int cov_one = 0, cov_two = 0; + + int begin_index = left_boundary - 2 * read_length; + int end_index = left_boundary - read_length; + + for (int i = begin_index; i <= end_index; i++) { + cov_one += coverage.get(i); + } + + begin_index = left_boundary - read_length; + end_index = left_boundary; + + for (int i = begin_index; i <= end_index; i++) { + cov_two += coverage.get(i); + } + + coverage_donor = (double) (cov_one - cov_two) / (right_boundary - left_boundary); + + begin_index = right_boundary; + end_index = right_boundary + read_length; + cov_one = 0; + cov_two = 0; + + for (int i = begin_index; i <= end_index; i++) { + cov_one += coverage.get(i); + } + + begin_index = right_boundary - read_length; + end_index = right_boundary; + + for (int i = begin_index; i <= end_index; i++) { + cov_two += coverage.get(i); + } + + coverage_acceptor = (double) (cov_one - cov_two) / (right_boundary - left_boundary); + + return (coverage_acceptor + coverage_donor); + } + + public double get_intron_coverage_score(List coverage, int right, int left, int read_length) { + + double coverage_donor, coverage_acceptor; + int cov_one = 0; + + int begin_index = right - 6 * read_length; + int end_index = right - 2 * read_length; + + for (int i = begin_index; i <= end_index; i++) { + cov_one += coverage.get(i); + } + + coverage_donor = (double) (cov_one / (right - left)); + + begin_index = left + 2 * read_length; + end_index = left + 6 * read_length; + cov_one = 0; + + for (int i = begin_index; i <= end_index; i++) { + cov_one += coverage.get(i); + } + + coverage_acceptor = (double) (cov_one / (right - left)); + + return (coverage_acceptor + coverage_donor); + } + + public int get_shorter_length_of_alignment(List list) { + + int min = Integer.MIN_VALUE; + + for (Information information : list) { + int shorter_length = information.left_length < information.right_length ? information.left_length : information.right_length; + + if (min <= shorter_length) { + min = shorter_length; + } + } + + return min; + } + + public double get_multiple_mapping_score(List list, HashMap> map) { + + int total_reads = list.size(); + int value = 0; + + for (Information information : list) { + int id = information.getContig_id(); + + if (usrs.containsKey(id)) { + value += 1; + } else { + value += ((List) map.get(id)).size(); + } + } + + return (double) total_reads / value; + } + + public double get_number_of_mismatches(List list) { + double mismatch = 0.0; + + for (Information information : list) { + mismatch += information.getLeft_mismatch() + information.getRight_mismatch(); + } + + return mismatch / list.size(); + } + + public class Information { + private int contig_id; + private int part_no; + private int left_boundary; + private int right_boundary; + private int left_mismatch; + private int right_mismatch; + private int left_length; + private int right_length; + + private boolean is_canonical_junction; + private boolean is_semi_canonical_junction; + private boolean is_non_canonical_junction; + + private int intron_length; + private boolean coverage; + + public boolean isCoverage() { + return coverage; + } + + public void setCoverage(boolean coverage) { + this.coverage = coverage; + } + + public int getContig_id() { + return contig_id; + } + + public void setContig_id(int contig_id) { + this.contig_id = contig_id; + } + + public boolean isIs_canonical_junction() { + return is_canonical_junction; + } + + public void setIs_canonical_junction(boolean is_canonical_junction) { + this.is_canonical_junction = is_canonical_junction; + } + + public boolean isIs_semi_canonical_junction() { + return is_semi_canonical_junction; + } + + public void setIs_semi_canonical_junction(boolean is_semi_canonical_junction) { + this.is_semi_canonical_junction = is_semi_canonical_junction; + } + + public boolean isIs_non_canonical_junction() { + return is_non_canonical_junction; + } + + public void setIs_non_canonical_junction(boolean is_non_canonical_junction) { + this.is_non_canonical_junction = is_non_canonical_junction; + } + + public int getIntron_length() { + return intron_length; + } + + public void setIntron_length(int intron_length) { + this.intron_length = intron_length; + } + + public int getLeft_length() { + return left_length; + } + + public void setLeft_length(int left_length) { + this.left_length = left_length; + } + + public int getRight_length() { + return right_length; + } + + public void setRight_length(int right_length) { + this.right_length = right_length; + } + + public int getPart_no() { + return part_no; + } + + public void setPart_no(int part_no) { + this.part_no = part_no; + } + + public int getLeft_mismatch() { + return left_mismatch; + } + + public void setLeft_mismatch(int left_mismatch) { + this.left_mismatch = left_mismatch; + } + + public int getRight_mismatch() { + return right_mismatch; + } + + public void setRight_mismatch(int right_mismatch) { + this.right_mismatch = right_mismatch; + } + + public int getLeft_boundary() { + return left_boundary; + } + + public void setLeft_boundary(int left_boundary) { + this.left_boundary = left_boundary; + } + + public int getRight_boundary() { + return right_boundary; + } + + public void setRight_boundary(int right_boundary) { + this.right_boundary = right_boundary; + } + } + + public class Junction { + private int donor_site; + private int acceptor_site; + + public int getDonor_site() { + return donor_site; + } + + public void setDonor_site(int donor_site) { + this.donor_site = donor_site; + } + + public int getAcceptor_site() { + return acceptor_site; + } + + public void setAcceptor_site(int acceptor_site) { + this.acceptor_site = acceptor_site; + } + + } + + public class Accuracy { + public double sensitivity; + public double specificity; + public int predicted_introns; + public int duplicates; + public int intersection; + } + + public static void run_script(String script_file_name) throws IOException, InterruptedException { + String line; + + Process p = Runtime.getRuntime().exec("Rscript " + script_file_name); + + BufferedReader bri = new BufferedReader + (new InputStreamReader(p.getInputStream())); + + BufferedReader bre = new BufferedReader + (new InputStreamReader(p.getErrorStream())); + + while ((line = bri.readLine()) != null) { + System.out.println(line); + } + bri.close(); + + while ((line = bre.readLine()) != null) { + System.out.println(line); + } + bre.close(); + + p.waitFor(); + System.out.println("Done."); + } + + public static void main(String[] args) throws IOException, InterruptedException { + + Date d1 = new Date(); + + long startCpuTimeNano = getCpuTime(); + + String exact_file_name = "50_hg19.junc"; + String junction_file_name = "junctions_information.info"; + String statistical_file_name = "statistics.info"; + String pos_neg_file_name = "pos_neg.info"; + String script_file_name = "script.r"; + + Statistics statistics = new Statistics("properties.prop"); + + String reference = read_sequence(reference_file_name); + + List cov_two = retrieve_coverage(cov_file_name); + + HashMap> map = statistics.get_unique_spliced_reads(junction_file_name); + + System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++"); + + System.out.println("JUNCTIONS: " + junctions.size()); + + if (_switch.equalsIgnoreCase("off")) { + statistics._derive_sets(reference, map); + } else { + statistics.derive_sets(reference, map); + } + + System.out.println("USRs: " + usrs.size()); + System.out.println("MSRs: " + msrs.size()); + System.out.println("NUSRs: " + nusrs.size()); + + System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++"); + + List list = statistics.derive_positive_set(); + System.out.println("POSITIVE_LIST_SIZE: " + list.size()); + + List pos_reduced_junctions = statistics.get_contracted_junctions(list); + System.out.println("POSITIVE_REDUCED_LIST_SIZE: " + pos_reduced_junctions.size()); + + if (_switch.equalsIgnoreCase("off")) { + list = statistics._derive_negative_set(map, minimum_length); + } else { + list = statistics.derive_negative_set(map, minimum_length); + } + + System.out.println("NEGATIVE_LIST_SIZE: " + list.size()); + + List neg_reduced_junctions = statistics.get_contracted_junctions(list); + System.out.println("NEGATIVE_REDUCED_LIST_SIZE: " + neg_reduced_junctions.size()); + + System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++"); + + /*List list_exact = statistics.getExactIntrons(exact_file_name, chr_name); + statistics.getAppIntersection(list_exact, pos_reduced_junctions); + statistics.getAppIntersection(list_exact, neg_reduced_junctions);*/ + + List distribution_list_left = statistics.get_distribution_left(reference, pos_reduced_junctions, traverse, shift, kmer_length); + + List distribution_list_right = statistics.get_distribution_right(reference, pos_reduced_junctions, traverse, shift, kmer_length); + + distribution_left.clear(); + distribution_right.clear(); + + List distribution_list_left_intron = statistics.get_distribution_left(reference, pos_reduced_junctions, shift, traverse, kmer_length); + + List distribution_list_right_intron = statistics.get_distribution_right(reference, pos_reduced_junctions, shift, traverse, kmer_length); + + distribution_left.clear(); + distribution_right.clear(); + + int critical_length = statistics.get_critical_intron_length(pos_reduced_junctions); + + List rows = statistics.create_statistical_table(statistical_file_name, map, + pos_reduced_junctions, neg_reduced_junctions, cov_two, read_length, critical_length, reference, false, + distribution_list_left, distribution_list_right, distribution_list_left_intron, distribution_list_right_intron); + + System.out.println("NUMBER OF ROWS ->: " + rows.size()); + + int max_value = Integer.MAX_VALUE; + List final_list = new ArrayList(); + + //List list_exact = statistics.getExactIntrons(exact_file_name, chromosome_name + "."); + + for (int i = 0; i < iteration; i++) { + statistics.create_pos_neg_file(pos_neg_file_name, pos_reduced_junctions, neg_reduced_junctions, rows); + Statistics.run_script(script_file_name); + List list_inexact = statistics.get_predicted_introns("predict.info"); + + if (list_inexact.size() <= max_value) { + final_list = new ArrayList(list_inexact); + max_value = list_inexact.size(); + } + + //statistics.getAppIntersection(list_exact, list_inexact); + } + + String file_name = "predicted_junctions_list_" + chromosome_name + ".txt"; + + FileWriter fileWriter = new FileWriter(file_name, false); + BufferedWriter out = new BufferedWriter(fileWriter); + + for (Information information : final_list) { + String print_line = chromosome_name + "\t" + information.left_boundary + "\t" + information.right_boundary + "\n"; + out.write(print_line); + } + + out.close(); + System.out.println("**************************************************"); + + + /* List sortedByIntronsNumbers = new ArrayList(accuracy_list); + + Collections.sort(sortedByIntronsNumbers, new Comparator() { + public int compare(Accuracy p1, Accuracy p2) { + return Integer.valueOf(p1.predicted_introns).compareTo(p2.predicted_introns); + } + }); + + System.out.println("Sensitivity" + "\t" + "Specificity" + "\t" + "Predicted" + "\t" + "Intersection" + "\t" + "Duplicates"); + + for (Accuracy accuracy : sortedByIntronsNumbers) { + System.out.println(accuracy.sensitivity + "\t" + accuracy.specificity + "\t" + accuracy.predicted_introns + + "\t" + accuracy.intersection + "\t" + accuracy.duplicates); + }*/ + + + Date d2 = new Date(); + + long diff = d2.getTime() - d1.getTime(); + long taskCpuTimeNano = getCpuTime() - startCpuTimeNano; + double time_in_seconds = (double) taskCpuTimeNano / 1000000000.0; + + System.out.println("CPU TIME: " + time_in_seconds + " SECONDS. "); + } + + public class KMer { + public String kMer; + public int count; + + public String getkMer() { + return kMer; + } + + public void setkMer(String kMer) { + this.kMer = kMer; + } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + public KMer(String kMer, int count) { + this.kMer = kMer; + this.count = count; + } + } + +}