RosettaCodeData/Task/Huffman-coding/OCaml/huffman-coding.ml

51 lines
1.6 KiB
OCaml

type 'a huffman_tree =
| Leaf of 'a
| Node of 'a huffman_tree * 'a huffman_tree
module HSet = Set.Make
(struct
type t = int * char huffman_tree (* pair of frequency and the tree *)
let compare = compare
(* We can use the built-in compare function to order this: it will order
first by the first element (frequency) and then by the second (the tree),
the latter of which we don't care about but which helps prevent elements
from being equal, since Set does not allow duplicate elements *)
end);;
let build_tree charFreqs =
let leaves = HSet.of_list (List.map (fun (c,f) -> (f, Leaf c)) charFreqs) in
let rec aux trees =
let f1, a = HSet.min_elt trees in
let trees' = HSet.remove (f1,a) trees in
if HSet.is_empty trees' then
a
else
let f2, b = HSet.min_elt trees' in
let trees'' = HSet.remove (f2,b) trees' in
let trees''' = HSet.add (f1 + f2, Node (a, b)) trees'' in
aux trees'''
in
aux leaves
let rec print_tree code = function
| Leaf c ->
Printf.printf "%c\t%s\n" c (String.concat "" (List.rev code));
| Node (l, r) ->
print_tree ("0"::code) l;
print_tree ("1"::code) r
let () =
let str = "this is an example for huffman encoding" in
let charFreqs = Hashtbl.create 42 in
String.iter (fun c ->
let old =
try Hashtbl.find charFreqs c
with Not_found -> 0 in
Hashtbl.replace charFreqs c (old+1)
) str;
let charFreqs = Hashtbl.fold (fun c f acc -> (c,f)::acc) charFreqs [] in
let tree = build_tree charFreqs in
print_string "Symbol\tHuffman code\n";
print_tree [] tree