171 lines
5.5 KiB
Plaintext
171 lines
5.5 KiB
Plaintext
' Nucleotides
|
|
Dim Shared As Integer A, C, G, T, Total
|
|
|
|
Sub PrintReport(text As String)
|
|
A = 0 : C = 0 : G = 0 : T = 0
|
|
For i As Integer = 1 To Len(text)
|
|
Select Case Ucase(Mid(text, i, 1))
|
|
Case "A": A += 1
|
|
Case "C": C += 1
|
|
Case "G": G += 1
|
|
Case "T": T += 1
|
|
End Select
|
|
Next
|
|
Total = Len(text)
|
|
|
|
Print "Nucleotide counts for: "; Iif(Len(text) > 50, Chr(10), "");
|
|
Print text
|
|
Print "Bases: A"; A; ", C:"; C; ", G:"; G; ", T:"; T; ", total:"; Total
|
|
Print
|
|
End Sub
|
|
|
|
Function Concatenate(before As String, after As String) As String
|
|
For i As Integer = 1 To Len(before)
|
|
Dim As String suffix = Mid(before, i)
|
|
If Left(after, Len(suffix)) = suffix Then Return Left(before, i - 1) + after
|
|
Next
|
|
Return before + after
|
|
End Function
|
|
|
|
' Remove duplicates and strings contained within a larger string from a list of strings
|
|
Sub Deduplicate(list() As String, result() As String)
|
|
Dim As Integer i, j
|
|
' Remove duplicates
|
|
Dim As String unique(0 To Ubound(list))
|
|
Dim As Integer uniqueCount = 0
|
|
For i = 0 To Ubound(list)
|
|
Dim As Boolean exists = False
|
|
For j = 0 To uniqueCount - 1
|
|
If list(i) = unique(j) Then exists = True : Exit For
|
|
Next
|
|
If Not exists Then
|
|
unique(uniqueCount) = list(i)
|
|
uniqueCount += 1
|
|
End If
|
|
Next
|
|
|
|
' Remove substrings
|
|
Dim As String tempRes(0 To uniqueCount - 1)
|
|
Dim As Integer resCount = 0
|
|
For i = 0 To uniqueCount - 1
|
|
Dim As Boolean isSubstring = False
|
|
For j = 0 To uniqueCount - 1
|
|
If i <> j And Instr(unique(j), unique(i)) > 0 Then
|
|
isSubstring = True
|
|
Exit For
|
|
End If
|
|
Next
|
|
If Not isSubstring Then
|
|
tempRes(resCount) = unique(i)
|
|
resCount += 1
|
|
End If
|
|
Next
|
|
|
|
Redim result(0 To resCount - 1)
|
|
For i = 0 To resCount - 1
|
|
result(i) = tempRes(i)
|
|
Next
|
|
End Sub
|
|
|
|
' Gets all permutations of a list of strings
|
|
Sub ProcessPermutation(perm() As String, result() As String, Byref minLength As Integer)
|
|
Dim As String candidate = perm(0)
|
|
For i As Integer = 1 To Ubound(perm)
|
|
candidate = Concatenate(candidate, perm(i))
|
|
Next
|
|
|
|
If Len(candidate) < minLength Then
|
|
Erase result
|
|
Redim result(0)
|
|
result(0) = candidate
|
|
minLength = Len(candidate)
|
|
Elseif Len(candidate) = minLength Then
|
|
Dim As Boolean exists = False
|
|
For s As Integer = 0 To Ubound(result)
|
|
If result(s) = candidate Then exists = True : Exit For
|
|
Next
|
|
If Not exists Then
|
|
Redim Preserve result(0 To Ubound(result) + 1)
|
|
result(Ubound(result)) = candidate
|
|
End If
|
|
End If
|
|
End Sub
|
|
|
|
' Returns shortest common superstring of a list of strings
|
|
Sub ShortestCommonSuperstrings(list() As String, result() As String)
|
|
Dim As String deduplicated()
|
|
Deduplicate(list(), deduplicated())
|
|
If Ubound(deduplicated) < 0 Then Exit Sub
|
|
|
|
Dim As Integer i, j, minLength = 0
|
|
For i = 0 To Ubound(list)
|
|
minLength += Len(list(i))
|
|
Next
|
|
|
|
Dim As Integer n = Ubound(deduplicated) + 1
|
|
Dim As Integer indexes(n - 1)
|
|
Dim As String currentPerm(n - 1)
|
|
|
|
For i = 0 To n - 1
|
|
indexes(i) = 0
|
|
currentPerm(i) = deduplicated(i)
|
|
Next
|
|
|
|
ProcessPermutation(currentPerm(), result(), minLength)
|
|
|
|
i = 0
|
|
While i < n
|
|
If indexes(i) < i Then
|
|
j = Iif(i Mod 2 = 0, 0, indexes(i))
|
|
Swap currentPerm(j), currentPerm(i)
|
|
|
|
ProcessPermutation(currentPerm(), result(), minLength)
|
|
|
|
indexes(i) += 1
|
|
i = 0
|
|
Else
|
|
indexes(i) = 0
|
|
i += 1
|
|
End If
|
|
Wend
|
|
End Sub
|
|
|
|
' Test cases
|
|
Dim As String test_sequences(0 To 3, 0 To 12) = { _
|
|
{"TA", "AAG", "TA", "GAA", "TA", "", "", "", "", "", "", "", ""}, _
|
|
{"CATTAGGG", "ATTAG", "GGG", "TA", "", "", "", "", "", "", "", "", ""}, _
|
|
{"AAGAUGGA", "GGAGCGCAUC", "AUCGCAAUAAGGA", "", "", "", "", "", "", "", "", "", ""}, _
|
|
{ "ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT", _
|
|
"GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT", _
|
|
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", _
|
|
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
|
|
"AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT", _
|
|
"GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC", _
|
|
"CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT", _
|
|
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
|
|
"CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC", _
|
|
"GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT", _
|
|
"TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC", _
|
|
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA", _
|
|
"TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA" } _
|
|
}
|
|
|
|
For i As Integer = 0 To 3
|
|
Dim As String test_case()
|
|
For j As Integer = 0 To 12
|
|
If Len(test_sequences(i, j)) > 0 Then
|
|
Redim Preserve test_case(Ubound(test_case) + 1)
|
|
test_case(Ubound(test_case)) = test_sequences(i, j)
|
|
End If
|
|
Next
|
|
|
|
Dim As String superstrings()
|
|
ShortestCommonSuperstrings(test_case(), superstrings())
|
|
|
|
For s As Integer = 0 To Ubound(superstrings)
|
|
PrintReport(superstrings(s))
|
|
Next
|
|
Next
|
|
|
|
Sleep
|