RosettaCodeData/Task/Stream-merge/AWK/stream-merge.awk

66 lines
1.8 KiB
Awk

# syntax: GAWK -f STREAM_MERGE.AWK filename(s) >output
# handles 1 .. N files
#
# variable purpose
# ---------- -------
# data_arr holds last record read
# fn_arr filenames on command line
# fnr_arr record counts for each file
# status_arr file status: 1=more data, 0=EOF, -1=error
#
BEGIN {
files = ARGC-1
# get filename, file status and first record
for (i=1; i<=files; i++) {
fn_arr[i] = ARGV[i]
status_arr[i] = getline <fn_arr[i]
if (status_arr[i] == 1) {
nr++ # records read
fnr_arr[i]++
data_arr[i] = $0
}
else if (status_arr[i] < 0) {
error(sprintf("FILENAME=%s, status=%d, file not found",fn_arr[i],status_arr[i]))
}
}
while (1) { # until EOF in all files
# get file number of the first file still containing data
fno = 0 # file number
for (i=1; i<=files; i++) {
if (status_arr[i] == 1) {
fno = i
break
}
}
if (fno == 0) { # EOF in all files
break
}
# determine which file has the lowest record in collating sequence
for (i=1; i<=files; i++) {
if (status_arr[i] == 1) {
if (data_arr[i] < data_arr[fno]) {
fno = i
}
}
}
# output record, get next record, if not EOF then check sequence
printf("%s\n",data_arr[fno])
status_arr[fno] = getline <fn_arr[fno] # get next record from this file
if (status_arr[fno] == 1) {
nr++
fnr_arr[fno]++
if (data_arr[fno] > $0) {
error(sprintf("FILENAME=%s, FNR=%d, out of sequence",fn_arr[fno],fnr_arr[fno]))
}
data_arr[fno] = $0
}
}
# EOJ
printf("input: %d files, %d records, %d errors\n",files,nr,errors) >"con"
exit(0)
}
function error(message) {
printf("error: %s\n",message) >"con"
errors++
}