52 lines
1.2 KiB
Awk
52 lines
1.2 KiB
Awk
# syntax: GAWK -f FASTA_FORMAT.AWK filename
|
|
# stop processing each file when an error is encountered
|
|
{ if (FNR == 1) {
|
|
header_found = 0
|
|
if ($0 !~ /^[;>]/) {
|
|
error("record is not valid")
|
|
nextfile
|
|
}
|
|
}
|
|
if ($0 ~ /^;/) { next } # comment begins with a ";"
|
|
if ($0 ~ /^>/) { # header
|
|
if (header_found > 0) {
|
|
printf("\n") # EOL for previous sequence
|
|
}
|
|
printf("%s: ",substr($0,2))
|
|
header_found = 1
|
|
next
|
|
}
|
|
if ($0 ~ /[ \t]/) { next } # ignore records with whitespace
|
|
if ($0 ~ /\*$/) { # sequence may end with an "*"
|
|
if (header_found > 0) {
|
|
printf("%s\n",substr($0,1,length($0)-1))
|
|
header_found = 0
|
|
next
|
|
}
|
|
else {
|
|
error("end of sequence found but header is missing")
|
|
nextfile
|
|
}
|
|
}
|
|
if (header_found > 0) {
|
|
printf("%s",$0)
|
|
}
|
|
else {
|
|
error("header not found")
|
|
nextfile
|
|
}
|
|
}
|
|
ENDFILE {
|
|
if (header_found > 0) {
|
|
printf("\n")
|
|
}
|
|
}
|
|
END {
|
|
exit (errors == 0) ? 0 : 1
|
|
}
|
|
function error(message) {
|
|
printf("error: FILENAME=%s, FNR=%d, %s, %s\n",FILENAME,FNR,message,$0) >"con"
|
|
errors++
|
|
return
|
|
}
|