154 lines
4.0 KiB
Python
154 lines
4.0 KiB
Python
'''Bioinformatics – base count'''
|
||
|
||
from itertools import count
|
||
from functools import reduce
|
||
|
||
|
||
# genBankFormatWithBaseCounts :: String -> String
|
||
def genBankFormatWithBaseCounts(sequence):
|
||
'''DNA Sequence displayed in a subset of the GenBank format.
|
||
See example at foot of:
|
||
https://www.genomatix.de/online_help/help/sequence_formats.html
|
||
'''
|
||
ks, totals = zip(*baseCounts(sequence))
|
||
ns = list(map(str, totals))
|
||
w = 2 + max(map(len, ns))
|
||
|
||
return '\n'.join([
|
||
'DEFINITION len=' + str(sum(totals)),
|
||
'BASE COUNT ' + ''.join(
|
||
n.rjust(w) + ' ' + k.lower() for (k, n)
|
||
in zip(ks, ns)
|
||
),
|
||
'ORIGIN'
|
||
] + [
|
||
str(i).rjust(9) + ' ' + k for i, k
|
||
in zip(
|
||
count(1, 60),
|
||
[
|
||
' '.join(row) for row in
|
||
chunksOf(6)(chunksOf(10)(sequence))
|
||
]
|
||
)
|
||
] + ['//'])
|
||
|
||
|
||
# baseCounts :: String -> Zip [(String, Int)]
|
||
def baseCounts(baseString):
|
||
'''Sums for each base type in the given sequence string, with
|
||
a fifth sum for any characters not drawn from {A, C, G, T}.'''
|
||
bases = {
|
||
'A': 0,
|
||
'C': 1,
|
||
'G': 2,
|
||
'T': 3
|
||
}
|
||
return zip(
|
||
list(bases.keys()) + ['Other'],
|
||
foldl(
|
||
lambda a: compose(
|
||
nthArrow(succ)(a),
|
||
flip(curry(bases.get))(4)
|
||
)
|
||
)((0, 0, 0, 0, 0))(baseString)
|
||
)
|
||
|
||
|
||
# -------------------------- TEST --------------------------
|
||
# main :: IO ()
|
||
def main():
|
||
'''Base counts and sequence displayed in GenBank format
|
||
'''
|
||
print(
|
||
genBankFormatWithBaseCounts('''\
|
||
CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG\
|
||
CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG\
|
||
AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT\
|
||
GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT\
|
||
CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG\
|
||
TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA\
|
||
TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT\
|
||
CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG\
|
||
TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC\
|
||
GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT''')
|
||
)
|
||
|
||
|
||
# ------------------------ GENERIC -------------------------
|
||
|
||
# chunksOf :: Int -> [a] -> [[a]]
|
||
def chunksOf(n):
|
||
'''A series of lists of length n, subdividing the
|
||
contents of xs. Where the length of xs is not evenly
|
||
divible, the final list will be shorter than n.
|
||
'''
|
||
return lambda xs: reduce(
|
||
lambda a, i: a + [xs[i:n + i]],
|
||
range(0, len(xs), n), []
|
||
) if 0 < n else []
|
||
|
||
|
||
# compose :: ((a -> a), ...) -> (a -> a)
|
||
def compose(*fs):
|
||
'''Composition, from right to left,
|
||
of a series of functions.
|
||
'''
|
||
def go(f, g):
|
||
def fg(x):
|
||
return f(g(x))
|
||
return fg
|
||
return reduce(go, fs, lambda x: x)
|
||
|
||
|
||
# curry :: ((a, b) -> c) -> a -> b -> c
|
||
def curry(f):
|
||
'''A curried function derived
|
||
from an uncurried function.
|
||
'''
|
||
return lambda x: lambda y: f(x, y)
|
||
|
||
|
||
# flip :: (a -> b -> c) -> b -> a -> c
|
||
def flip(f):
|
||
'''The (curried or uncurried) function f with its
|
||
arguments reversed.
|
||
'''
|
||
return lambda a: lambda b: f(b)(a)
|
||
|
||
|
||
# foldl :: (a -> b -> a) -> a -> [b] -> a
|
||
def foldl(f):
|
||
'''Left to right reduction of a list,
|
||
using the binary operator f, and
|
||
starting with an initial value a.
|
||
'''
|
||
def go(acc, xs):
|
||
return reduce(lambda a, x: f(a)(x), xs, acc)
|
||
return lambda acc: lambda xs: go(acc, xs)
|
||
|
||
|
||
# nthArrow :: (a -> b) -> Tuple -> Int -> Tuple
|
||
def nthArrow(f):
|
||
'''A simple function lifted to one which applies
|
||
to a tuple, transforming only its nth value.
|
||
'''
|
||
def go(v, n):
|
||
return v if n > len(v) else [
|
||
x if n != i else f(x)
|
||
for i, x in enumerate(v)
|
||
]
|
||
return lambda tpl: lambda n: tuple(go(tpl, n))
|
||
|
||
|
||
# succ :: Enum a => a -> a
|
||
def succ(x):
|
||
'''The successor of a value.
|
||
For numeric types, (1 +).
|
||
'''
|
||
return 1 + x
|
||
|
||
|
||
# MAIN ---
|
||
if __name__ == '__main__':
|
||
main()
|