RosettaCodeData/Task/Burrows-Wheeler-transform/Haskell/burrows-wheeler-transform.hs

46 lines
1.4 KiB
Haskell
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- A straightforward, inefficient implementation of the BurrowsWheeler
-- transform, based on the description in the Wikipedia article.
--
-- Special characters are *not* used to indicate the start or end of sequences,
-- so all strings can be represented.
import Data.List ((!!), find, sort, tails, transpose)
import Data.Maybe (fromJust)
import Text.Printf (printf)
newtype BWT a = BWT [Val a]
bwt :: Ord a => [a] -> BWT a
bwt xs = let n = length xs + 2
ys = transpose $ sort $ take n $ tails $ cycle $ pos xs
in BWT $ ys !! (n-1)
invBwt :: Ord a => BWT a -> [a]
invBwt (BWT xs) = let ys = iterate step (map (const []) xs) !! length xs
in unpos $ fromJust $ find ((== Post) . last) ys
where step = sort . zipWith (:) xs
data Val a = In a | Pre | Post deriving (Eq, Ord)
pos :: [a] -> [Val a]
pos xs = Pre : map In xs ++ [Post]
unpos :: [Val a] -> [a]
unpos xs = [x | In x <- xs]
main :: IO ()
main = mapM_ testBWT [ "", "a", "BANANA", "dogwood",
"TO BE OR NOT TO BE OR WANT TO BE OR NOT?",
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES" ]
testBWT :: String -> IO ()
testBWT xs = let fwd = bwt xs
inv = invBwt fwd
in printf "%s\n\t%s\n\t%s\n" xs (pretty fwd) inv
where pretty (BWT ps) = map prettyVal ps
prettyVal (In c) = c
prettyVal Pre = '^'
prettyVal Post = '|'