76 lines
3.0 KiB
Prolog
76 lines
3.0 KiB
Prolog
%____________________________________________________________________
|
|
% Implements the American soundex algorithm
|
|
% as described at https://en.wikipedia.org/wiki/Soundex
|
|
% In SWI Prolog, a 'string' is specified in 'single quotes',
|
|
% while a "list of codes" may be specified in "double quotes".
|
|
% So, "abc" is equivalent to [97, 98, 99], while
|
|
% 'abc' = abc (an atom), and 'Abc' is also an atom. There are
|
|
% conversion methods that can produce lists of characters:
|
|
% ?- atom_chars('Abc', X).
|
|
% X = ['A', b, c].
|
|
% or lists of codes (mapping to unicode code points):
|
|
% ?- atom_codes('Abc', X).
|
|
% X = [65, 98, 99].
|
|
% and the conversion predicates are bidirectional.
|
|
% ?- atom_codes(A, [65, 98, 99]).
|
|
% A = 'Abc'.
|
|
% A single character code may be specified as 0'C, where C is the
|
|
% character you want to convert to a code.
|
|
%~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
% Relates groups of consonants to representative digits
|
|
creplace(Ch, 0'1) :- member(Ch, "bfpv").
|
|
creplace(Ch, 0'2) :- member(Ch, "cgjkqsxz").
|
|
creplace(Ch, 0'3) :- member(Ch, "dt").
|
|
creplace(0'l, 0'4).
|
|
creplace(Ch, 0'5) :- member(Ch, "mn").
|
|
creplace(0'r, 0'6).
|
|
|
|
% strips elements contained in <Set> from a string
|
|
strip(Set, [H|T], Tr) :- memberchk(H, Set), !, strip(Set, T, Tr).
|
|
strip(Set, [H|T], [H|Tr]) :- !, strip(Set, T, Tr).
|
|
strip(_, [], []).
|
|
|
|
% Replace consonants with appropriate digits
|
|
consonants([H|T], [Ch|Tr]) :- creplace(H, Ch), !, consonants(T, Tr).
|
|
consonants([H|T], [H|Tr]) :- !, consonants(T, Tr).
|
|
consonants([], []).
|
|
|
|
% Replace adjacent digits with single digit
|
|
adjacent([Ch, Ch|T], [Ch|Tr]) :- between(0'0, 0'9, Ch), !, adjacent(T, Tr).
|
|
adjacent([H|T], [H|Tr]) :- !, adjacent(T, Tr).
|
|
adjacent([], []).
|
|
|
|
% Replace first character with original one if its a digit
|
|
chk_digit([H,D|T], [H|T]) :- between(0'0, 0'9, D), !.
|
|
chk_digit([_,H|T], [H|T]).
|
|
|
|
% Faithul representation of soundex rules:
|
|
% 1: Save 1st letter, strip "hw"
|
|
% 2: Replace consonants with appropriate digits
|
|
% 3: Replace adjacent digits with single occurrence
|
|
% 4: Remove vowels except 1st letter
|
|
% 5: If 1st symbol is a digit, replace it with saved 1st letter
|
|
% 6: Ensure trailing zeroes
|
|
do_soundex([H|T], Res) :-
|
|
strip("hw", T, Ts), consonants([H|Ts], Tc),
|
|
adjacent(Tc, [C|Ta]), strip("aeiouy", Ta, Tv),
|
|
chk_digit([H,C|Tv], Td), append(Td, "0000", Tr),
|
|
atom_codes(Tf, Tr), sub_string(Tf, 0, 4, _, Res).
|
|
|
|
% Prepare string, convert to lower case and do the soundex alogorithm
|
|
soundex(Text, Res) :-
|
|
downcase_atom(Text, Lower), atom_codes(Lower, T),
|
|
do_soundex(T, Res).
|
|
|
|
% Perform tests to check that the right values are produced
|
|
test(S,V) :- not(soundex(S,V)), writef('%w failed\n', [S]).
|
|
test :- test('Robert', 'r163'), !, fail.
|
|
test :- test('Rupert', 'r163'), !, fail.
|
|
test :- test('Rubin', 'r150'), !, fail.
|
|
test :- test('Ashcroft', 'a261'), !, fail.
|
|
test :- test('Ashcraft', 'a261'), !, fail.
|
|
test :- test('Tymczak', 't522'), !, fail.
|
|
test :- test('Pfister', 'p236'), !, fail.
|
|
test. % Succeeds only if all the tests succeed
|