60 lines
1.3 KiB
Awk
60 lines
1.3 KiB
Awk
#!/usr/bin/awk -f
|
|
BEGIN {
|
|
subsep = ", "
|
|
delete homs
|
|
}
|
|
|
|
/^[a-zA-Z]/ {
|
|
sdx = strToSoundex($0)
|
|
addHom(sdx, $0)
|
|
}
|
|
|
|
END {
|
|
showHoms(3)
|
|
}
|
|
|
|
function strToSoundex(s, sdx, i, ch, cd, lch) {
|
|
if (length(s) == 0) return ""
|
|
s = tolower(s)
|
|
lch = substr(s, 1, 1);
|
|
sdx = toupper(lch)
|
|
lch = charToSoundex(lch)
|
|
for (i = 2; i <= length(s); i++) {
|
|
ch = substr(s, i, 1)
|
|
cd = charToSoundex(ch)
|
|
if (cd == 7) continue;
|
|
if (cd && cd != lch) sdx = sdx cd
|
|
lch = cd
|
|
}
|
|
sdx = substr(sdx "0000", 1, 4)
|
|
return sdx
|
|
}
|
|
|
|
function charToSoundex(ch, cd) {
|
|
if (ch ~ /[bfpv]/) cd = 1
|
|
else if (ch ~ /[cgjkqsxz]/) cd = 2
|
|
else if (ch ~ /[dt]/) cd = 3
|
|
else if (ch == "l") cd = 4
|
|
else if (ch ~ /[mn]/) cd = 5
|
|
else if (ch == "r") cd = 6
|
|
else if (ch ~ /[hw]/) cd = 7
|
|
else cd = 0
|
|
return cd;
|
|
}
|
|
|
|
function addHom(sdx, word) {
|
|
if (!(homs[sdx])) homs[sdx] = ""
|
|
homs[sdx] = homs[sdx] (homs[sdx] == "" ? "" : subsep) word
|
|
}
|
|
|
|
function showHoms(toShow, i, n, wl, j) {
|
|
for (i in homs) {
|
|
printf i " "
|
|
n = split(homs[i], wl, subsep)
|
|
for (j = 1; j <= toShow && j <= n; j++) {
|
|
printf wl[j] " "
|
|
}
|
|
print (n > toShow ? "..." : "")
|
|
}
|
|
}
|