RosettaCodeData/Task/Entropy/Prolog/entropy.pro

132 lines
3.4 KiB
Prolog

:-module(shannon_entropy, [shannon_entropy/2]).
%! shannon_entropy(+String, -Entropy) is det.
%
% Calculate the Shannon Entropy of String.
%
% Example query:
% ==
% ?- shannon_entropy(1223334444, H).
% H = 1.8464393446710154.
% ==
%
shannon_entropy(String, Entropy):-
atom_chars(String, Cs)
,relative_frequencies(Cs, Frequencies)
,findall(CI
,(member(_C-F, Frequencies)
,log2(F, L)
,CI is F * L
)
,CIs)
,foldl(sum, CIs, 0, E)
,Entropy is -E.
%! frequencies(+Characters,-Frequencies) is det.
%
% Calculates the relative frequencies of elements in the list of
% Characters.
%
% Frequencies is a key-value list with elements of the form:
% C-F, where C a character in the list and F its relative
% frequency in the list.
%
% Example query:
% ==
% ?- relative_frequencies([a,a,a,b,b,b,b,b,b,c,c,c,a,a,f], Fs).
% Fs = [a-0.3333333333333333, b-0.4, c-0.2,f-0.06666666666666667].
% ==
%
relative_frequencies(List, Frequencies):-
run_length_encoding(List, Rle)
% Sort Run-length encoded list and aggregate lengths by element
,keysort(Rle, Sorted_Rle)
,group_pairs_by_key(Sorted_Rle, Elements_Run_lengths)
,length(List, Elements_in_list)
,findall(E-Frequency_of_E
,(member(E-RLs, Elements_Run_lengths)
% Sum the list of lengths of runs of E
,foldl(plus, RLs, 0, Occurences_of_E)
,Frequency_of_E is Occurences_of_E / Elements_in_list
)
,Frequencies).
%! run_length_encoding(+List, -Run_length_encoding) is det.
%
% Converts a list to its run-length encoded form where each "run"
% of contiguous repeats of the same element is replaced by that
% element and the length of the run.
%
% Run_length_encoding is a key-value list, where each element is a
% term:
%
% Element:term-Repetitions:number.
%
% Example query:
% ==
% ?- run_length_encoding([a,a,a,b,b,b,b,b,b,c,c,c,a,a,f], RLE).
% RLE = [a-3, b-6, c-3, a-2, f-1].
% ==
%
run_length_encoding([], []-0):-
!. % No more results needed.
run_length_encoding([Head|List], Run_length_encoded_list):-
run_length_encoding(List, [Head-1], Reversed_list)
% The resulting list is in reverse order due to the head-to-tail processing
,reverse(Reversed_list, Run_length_encoded_list).
%! run_length_encoding(+List,+Initialiser,-Accumulator) is det.
%
% Business end of run_length_encoding/3. Calculates the run-length
% encoded form of a list and binds the result to the Accumulator.
% Initialiser is a list [H-1] where H is the first element of the
% input list.
%
run_length_encoding([], Fs, Fs).
% Run of F consecutive occurrences of C
run_length_encoding([C|Cs],[C-F|Fs], Acc):-
% Backtracking would produce successive counts
% of runs of C at different indices in the list.
!
,F_ is F + 1
,run_length_encoding(Cs, [C-F_| Fs], Acc).
% End of a run of consecutive identical elements.
run_length_encoding([C|Cs], Fs, Acc):-
run_length_encoding(Cs,[C-1|Fs], Acc).
/* Arithmetic helper predicates */
%! log2(N, L2_N) is det.
%
% L2_N is the logarithm with base 2 of N.
%
log2(N, L2_N):-
L_10 is log10(N)
,L_2 is log10(2)
,L2_N is L_10 / L_2.
%! sum(+A,+B,?Sum) is det.
%
% True when Sum is the sum of numbers A and B.
%
% Helper predicate to allow foldl/4 to do addition. The following
% call will raise an error (because there is no predicate +/3):
% ==
% foldl(+, [1,2,3], 0, Result).
% ==
%
% This will not raise an error:
% ==
% foldl(sum, [1,2,3], 0, Result).
% ==
%
sum(A, B, Sum):-
must_be(number, A)
,must_be(number, B)
,Sum is A + B.