RosettaCodeData/Task/Bioinformatics-Global-align.../FreeBASIC/bioinformatics-global-align...

171 lines
5.5 KiB
Plaintext

' Nucleotides
Dim Shared As Integer A, C, G, T, Total
Sub PrintReport(text As String)
A = 0 : C = 0 : G = 0 : T = 0
For i As Integer = 1 To Len(text)
Select Case Ucase(Mid(text, i, 1))
Case "A": A += 1
Case "C": C += 1
Case "G": G += 1
Case "T": T += 1
End Select
Next
Total = Len(text)
Print "Nucleotide counts for: "; Iif(Len(text) > 50, Chr(10), "");
Print text
Print "Bases: A"; A; ", C:"; C; ", G:"; G; ", T:"; T; ", total:"; Total
Print
End Sub
Function Concatenate(before As String, after As String) As String
For i As Integer = 1 To Len(before)
Dim As String suffix = Mid(before, i)
If Left(after, Len(suffix)) = suffix Then Return Left(before, i - 1) + after
Next
Return before + after
End Function
' Remove duplicates and strings contained within a larger string from a list of strings
Sub Deduplicate(list() As String, result() As String)
Dim As Integer i, j
' Remove duplicates
Dim As String unique(0 To Ubound(list))
Dim As Integer uniqueCount = 0
For i = 0 To Ubound(list)
Dim As Boolean exists = False
For j = 0 To uniqueCount - 1
If list(i) = unique(j) Then exists = True : Exit For
Next
If Not exists Then
unique(uniqueCount) = list(i)
uniqueCount += 1
End If
Next
' Remove substrings
Dim As String tempRes(0 To uniqueCount - 1)
Dim As Integer resCount = 0
For i = 0 To uniqueCount - 1
Dim As Boolean isSubstring = False
For j = 0 To uniqueCount - 1
If i <> j And Instr(unique(j), unique(i)) > 0 Then
isSubstring = True
Exit For
End If
Next
If Not isSubstring Then
tempRes(resCount) = unique(i)
resCount += 1
End If
Next
Redim result(0 To resCount - 1)
For i = 0 To resCount - 1
result(i) = tempRes(i)
Next
End Sub
' Gets all permutations of a list of strings
Sub ProcessPermutation(perm() As String, result() As String, Byref minLength As Integer)
Dim As String candidate = perm(0)
For i As Integer = 1 To Ubound(perm)
candidate = Concatenate(candidate, perm(i))
Next
If Len(candidate) < minLength Then
Erase result
Redim result(0)
result(0) = candidate
minLength = Len(candidate)
Elseif Len(candidate) = minLength Then
Dim As Boolean exists = False
For s As Integer = 0 To Ubound(result)
If result(s) = candidate Then exists = True : Exit For
Next
If Not exists Then
Redim Preserve result(0 To Ubound(result) + 1)
result(Ubound(result)) = candidate
End If
End If
End Sub
' Returns shortest common superstring of a list of strings
Sub ShortestCommonSuperstrings(list() As String, result() As String)
Dim As String deduplicated()
Deduplicate(list(), deduplicated())
If Ubound(deduplicated) < 0 Then Exit Sub
Dim As Integer i, j, minLength = 0
For i = 0 To Ubound(list)
minLength += Len(list(i))
Next
Dim As Integer n = Ubound(deduplicated) + 1
Dim As Integer indexes(n - 1)
Dim As String currentPerm(n - 1)
For i = 0 To n - 1
indexes(i) = 0
currentPerm(i) = deduplicated(i)
Next
ProcessPermutation(currentPerm(), result(), minLength)
i = 0
While i < n
If indexes(i) < i Then
j = Iif(i Mod 2 = 0, 0, indexes(i))
Swap currentPerm(j), currentPerm(i)
ProcessPermutation(currentPerm(), result(), minLength)
indexes(i) += 1
i = 0
Else
indexes(i) = 0
i += 1
End If
Wend
End Sub
' Test cases
Dim As String test_sequences(0 To 3, 0 To 12) = { _
{"TA", "AAG", "TA", "GAA", "TA", "", "", "", "", "", "", "", ""}, _
{"CATTAGGG", "ATTAG", "GGG", "TA", "", "", "", "", "", "", "", "", ""}, _
{"AAGAUGGA", "GGAGCGCAUC", "AUCGCAAUAAGGA", "", "", "", "", "", "", "", "", "", ""}, _
{ "ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT", _
"GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT", _
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", _
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
"AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT", _
"GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC", _
"CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT", _
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
"CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC", _
"GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT", _
"TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", _
"TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA" } _
}
For i As Integer = 0 To 3
Dim As String test_case()
For j As Integer = 0 To 12
If Len(test_sequences(i, j)) > 0 Then
Redim Preserve test_case(Ubound(test_case) + 1)
test_case(Ubound(test_case)) = test_sequences(i, j)
End If
Next
Dim As String superstrings()
ShortestCommonSuperstrings(test_case(), superstrings())
For s As Integer = 0 To Ubound(superstrings)
PrintReport(superstrings(s))
Next
Next
Sleep