# syntax: GAWK -f FUNCTION_FREQUENCY.AWK filename(s).AWK # # sorting: # PROCINFO["sorted_in"] is used by GAWK # SORTTYPE is used by Thompson Automation's TAWK # BEGIN { # create array of keywords to be ignored by lexer asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \ "do:else:exit:exp:for:getline:gsub:if:in:index:int:" \ "length:log:match:next:print:printf:rand:return:sin:" \ "split:sprintf:sqrt:srand:strftime:sub:substr:system:tolower:toupper:while", keywords,":") # build the symbol-state table split("00:00:00:00:00:00:00:00:00:00:" \ "20:10:10:12:12:11:07:00:00:00:" \ "08:08:08:08:08:33:08:00:00:00:" \ "08:44:08:36:08:08:08:00:00:00:" \ "08:44:45:42:42:41:08",machine,":") # parse the input state = 1 for (;;) { symb = lex() # get next symbol nextstate = substr(machine[state symb],1,1) act = substr(machine[state symb],2,1) # perform required action if (act == "0") { # do nothing } else if (act == "1") { # found a function call if (!(inarray(tok,names))) { names[++nnames] = tok } ++xnames[tok] } else if (act == "2") { # found a variable or array if (tok in Local) { tok = tok "(" funcname ")" if (!(inarray(tok,names))) { names[++nnames] = tok } ++xnames[tok] } else { tok = tok "()" if (!(inarray(tok,names))) { names[++nnames] = tok } ++xnames[tok] } } else if (act == "3") { # found a function definition funcname = tok } else if (act == "4") { # found a left brace braces++ } else if (act == "5") { # found a right brace braces-- if (braces == 0) { delete Local funcname = "" nextstate = 1 } } else if (act == "6") { # found a local variable declaration Local[tok] = 1 } else if (act == "7") { # found end of file break } else if (act == "8") { # found an error printf("error: FILENAME=%s, FNR=%d\n",FILENAME,FNR) exit(1) } state = nextstate # finished with current token } # format function names for (i=1; i<=nnames; i++) { if (index(names[i],"(") == 0) { tmp_arr[xnames[names[i]]][names[i]] = "" } } # print function names PROCINFO["sorted_in"] = "@ind_num_desc" ; SORTTYPE = 9 for (i in tmp_arr) { PROCINFO["sorted_in"] = "@ind_str_asc" ; SORTTYPE = 1 for (j in tmp_arr[i]) { if (++shown <= 10) { printf("%d %s\n",i,j) } } } exit(0) } function asplit(str,arr,fs, i,n,temp_asplit) { n = split(str,temp_asplit,fs) for (i=1; i<=n; i++) { arr[temp_asplit[i]]++ } } function inarray(val,arr, j) { for (j in arr) { if (arr[j] == val) { return(j) } } return("") } function lex() { for (;;) { if (tok == "(eof)") { return(7) } while (length(line) == 0) { if (getline line == 0) { tok = "(eof)" return(7) } } sub(/^[ \t]+/,"",line) # remove white space, sub(/^"([^"]|\\")*"/,"",line) # quoted strings, sub(/^\/([^\/]|\\\/)+\//,"",line) # regular expressions, sub(/^#.*/,"",line) # and comments if (line ~ /^function /) { tok = "function" line = substr(line,10) return(1) } else if (line ~ /^{/) { tok = "{" line = substr(line,2) return(2) } else if (line ~ /^}/) { tok = "}" line = substr(line,2) return(3) } else if (match(line,/^[A-Za-z_][A-Za-z_0-9]*\[/)) { tok = substr(line,1,RLENGTH-1) line = substr(line,RLENGTH+1) return(5) } else if (match(line,/^[A-Za-z_][A-Za-z_0-9]*\(/)) { tok = substr(line,1,RLENGTH-1) line = substr(line,RLENGTH+1) if (!(tok in keywords)) { return(6) } } else if (match(line,/^[A-Za-z_][A-Za-z_0-9]*/)) { tok = substr(line,1,RLENGTH) line = substr(line,RLENGTH+1) if (!(tok in keywords)) { return(4) } } else { match(line,/^[^A-Za-z_{}]/) tok = substr(line,1,RLENGTH) line = substr(line,RLENGTH+1) } } }