import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public final class BioinformaticsGlobalAlignment { public static void main(String[] aArgs) { List> testSequences = Arrays.asList( Arrays.asList( "TA", "AAG", "TA", "GAA", "TA" ), Arrays.asList( "CATTAGGG", "ATTAG", "GGG", "TA" ), Arrays.asList( "AAGAUGGA", "GGAGCGCAUC", "AUCGCAAUAAGGA" ), Arrays.asList( "ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT", "GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT", "CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", "TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", "AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT", "GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC", "CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT", "TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", "CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC", "GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT", "TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", "CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", "TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA" ) ); for ( List test : testSequences ) { for ( String superstring : shortestCommonSuperstrings(test) ) { printReport(superstring); } } } // Return a set containing all of the shortest common superstrings of the given list of strings. private static Set shortestCommonSuperstrings(List aList) { List deduplicated = deduplicate(aList); Set shortest = new HashSet(); shortest.add(String.join("", deduplicated)); int shortestLength = aList.stream().mapToInt( s -> s.length() ).sum(); for ( List permutation : permutations(deduplicated) ) { String candidate = permutation.stream().reduce("", (a, b) -> concatenate(a, b) ); if ( candidate.length() < shortestLength ) { shortest.clear(); shortest.add(candidate); shortestLength = candidate.length(); } else if ( candidate.length() == shortestLength ) { shortest.add(candidate); } } return shortest; } // Remove duplicate strings and strings which are substrings of other strings in the given list. private static List deduplicate(List aList) { List unique = aList.stream().distinct().collect(Collectors.toList()); List result = new ArrayList(unique); List markedForRemoval = new ArrayList(); for ( String testWord : result ) { for ( String word : unique ) { if ( ! word.equals(testWord) && word.contains(testWord) ) { markedForRemoval.add(testWord); } } } result.removeAll(markedForRemoval); return result; } // Return aBefore concatenated with aAfter, removing the longest suffix of aBefore that matches a prefix of aAfter. private static String concatenate(String aBefore, String aAfter) { for ( int i = 0; i < aBefore.length(); i++ ) { if ( aAfter.startsWith(aBefore.substring(i, aBefore.length())) ) { return aBefore.substring(0, i) + aAfter; } } return aBefore + aAfter; } // Return all permutations of the given list of strings. private static List> permutations(List aList) { int[] indexes = new int[aList.size()]; List> result = new ArrayList>(); result.add( new ArrayList(aList) ); int i = 0; while ( i < aList.size() ) { if ( indexes[i] < i ) { final int j = ( i % 2 == 0 ) ? 0 : indexes[i]; String temp = aList.get(j); aList.set(j, aList.get(i)); aList.set(i, temp); result.add( new ArrayList(aList) ); indexes[i]++; i = 0; } else { indexes[i] = 0; i += 1; } } return result; } // Print a report of the given string to the standard output device. private static void printReport(String aText) { char[] nucleotides = new char[] {'A', 'C', 'G', 'T' }; Map bases = new HashMap(); for ( char base : nucleotides ) { bases.put(base, 0); } for ( char ch : aText.toCharArray() ) { bases.merge(ch, 1, Integer::sum); } final int total = bases.values().stream().reduce(0, Integer::sum); System.out.print("Nucleotide counts for: " + ( ( aText.length() > 50 ) ? System.lineSeparator() : "") ); System.out.println(aText); System.out.print(String.format("%s%d%s%d%s%d%s%d", "Bases: A: ", bases.get('A'), ", C: ", bases.get('C'), ", G: ", bases.get('G'), ", T: ", bases.get('T'))); System.out.println(", total: " + total + System.lineSeparator()); } }