63 lines
1.2 KiB
Plaintext
63 lines
1.2 KiB
Plaintext
define LOOKAHEAD_LEN = 128
|
|
|
|
func bwt_sort (String s) { # O(n * LOOKAHEAD_LEN) space (fast)
|
|
^s.len -> map {|i|
|
|
var t = s.slice(i, LOOKAHEAD_LEN)
|
|
|
|
if (t.len < LOOKAHEAD_LEN) {
|
|
t += s.slice(0, min(i, LOOKAHEAD_LEN - t.len))
|
|
}
|
|
|
|
[t, i]
|
|
}.sort {|a,b|
|
|
(a[0] <=> b[0]) || (s.rotate(a[1]) <=> s.rotate(b[1]))
|
|
}.map { .[1] }
|
|
}
|
|
|
|
func bwt_encode(String s) {
|
|
|
|
var bwt = bwt_sort(s)
|
|
var ret = bwt.map {|i| s.slice(i-1, 1) }.join
|
|
var idx = bwt.first_index_by { .is_zero }
|
|
|
|
return (ret, idx)
|
|
}
|
|
|
|
func bwt_decode(String bwt, Number idx) { # fast inversion
|
|
|
|
var tail = bwt.chars
|
|
var head = tail.sort
|
|
|
|
var indices = Hash()
|
|
tail.each_kv {|i,v|
|
|
indices{v} := [] << i
|
|
}
|
|
|
|
var table = []
|
|
head.each_kv {|i,v|
|
|
table[i] = indices{v}.shift
|
|
}
|
|
|
|
var dec = ''
|
|
var i = idx
|
|
|
|
head.len.times {
|
|
dec += head[i]
|
|
i = table[i]
|
|
}
|
|
|
|
return dec
|
|
}
|
|
|
|
var tests = [
|
|
"banana", "appellee", "dogwood", "TOBEORNOTTOBEORTOBEORNOT"
|
|
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES",
|
|
]
|
|
|
|
tests.each { |str|
|
|
var (enc, idx) = bwt_encode(str)
|
|
var dec = bwt_decode(enc, idx)
|
|
say "BWT(#{dec.dump}) = (#{enc.dump}, #{idx})"
|
|
assert_eq(str, dec)
|
|
}
|