RosettaCodeData/Task/Huffman-coding/Perl/huffman-coding.pl

58 lines
1.2 KiB
Perl

use 5.10.0;
use strict;
# produce encode and decode dictionary from a tree
sub walk {
my ($node, $code, $h, $rev_h) = @_;
my $c = $node->[0];
if (ref $c) { walk($c->[$_], $code.$_, $h, $rev_h) for 0,1 }
else { $h->{$c} = $code; $rev_h->{$code} = $c }
$h, $rev_h
}
# make a tree, and return resulting dictionaries
sub mktree {
my (%freq, @nodes);
$freq{$_}++ for split '', shift;
@nodes = map([$_, $freq{$_}], keys %freq);
do { # poor man's priority queue
@nodes = sort {$a->[1] <=> $b->[1]} @nodes;
my ($x, $y) = splice @nodes, 0, 2;
push @nodes, [[$x, $y], $x->[1] + $y->[1]]
} while (@nodes > 1);
walk($nodes[0], '', {}, {})
}
sub encode {
my ($str, $dict) = @_;
join '', map $dict->{$_}//die("bad char $_"), split '', $str
}
sub decode {
my ($str, $dict) = @_;
my ($seg, @out) = ("");
# append to current segment until it's in the dictionary
for (split '', $str) {
$seg .= $_;
my $x = $dict->{$seg} // next;
push @out, $x;
$seg = '';
}
die "bad code" if length($seg);
join '', @out
}
my $txt = 'this is an example for huffman encoding';
my ($h, $rev_h) = mktree($txt);
for (keys %$h) { print "'$_': $h->{$_}\n" }
my $enc = encode($txt, $h);
print "$enc\n";
print decode($enc, $rev_h), "\n";