RosettaCodeData/Task/Soundex/AWK/soundex.awk

60 lines
1.3 KiB
Awk

#!/usr/bin/awk -f
BEGIN {
subsep = ", "
delete homs
}
/^[a-zA-Z]/ {
sdx = strToSoundex($0)
addHom(sdx, $0)
}
END {
showHoms(3)
}
function strToSoundex(s, sdx, i, ch, cd, lch) {
if (length(s) == 0) return ""
s = tolower(s)
lch = substr(s, 1, 1);
sdx = toupper(lch)
lch = charToSoundex(lch)
for (i = 2; i <= length(s); i++) {
ch = substr(s, i, 1)
cd = charToSoundex(ch)
if (cd == 7) continue;
if (cd && cd != lch) sdx = sdx cd
lch = cd
}
sdx = substr(sdx "0000", 1, 4)
return sdx
}
function charToSoundex(ch, cd) {
if (ch ~ /[bfpv]/) cd = 1
else if (ch ~ /[cgjkqsxz]/) cd = 2
else if (ch ~ /[dt]/) cd = 3
else if (ch == "l") cd = 4
else if (ch ~ /[mn]/) cd = 5
else if (ch == "r") cd = 6
else if (ch ~ /[hw]/) cd = 7
else cd = 0
return cd;
}
function addHom(sdx, word) {
if (!(homs[sdx])) homs[sdx] = ""
homs[sdx] = homs[sdx] (homs[sdx] == "" ? "" : subsep) word
}
function showHoms(toShow, i, n, wl, j) {
for (i in homs) {
printf i " "
n = split(homs[i], wl, subsep)
for (j = 1; j <= toShow && j <= n; j++) {
printf wl[j] " "
}
print (n > toShow ? "..." : "")
}
}