/* REXX ****************************************** Version 11.12.2015 ** * Rexx Tokenizer to find function invocations *----------------------------------------------------------------------- * Tokenization remembers the following for each token * t.i text of token * t.i.0t type of token: Cx/V/K/N/O/S/L * comment/variable/keyword/constant/operator/string/label * t.i.0il line of token in the input * t.i.0ic col of token in the input * t.i.0prev index of token starting previous instruction * t.i.0ol line of token in the output * t.i.0oc col of token in the output *---------------------------------------------------------------------*/ Call time 'R' Parse Upper Arg fid '(' options If fid='?' Then Do Say 'Tokenike a REXX proram and list the function invocations found' Say ' which are of the form symbol(... or ''string''(...' Say ' (the left parenthesis must immediately follow the symbol' Say ' or literal string.)' Say 'Syntax:' Say ' TKZ pgm < ( >' Exit End g.=0 Call init /* Initialize constants etc. */ g.0cont='01'x g.0breakc='02'x cnt.=0 Call readin /* Read input file into l.* */ Call tokenize /* Tokenize the input */ tk='' Call process_tokens g.0fun_list=wordsort(g.0fun_list) Do While g.0fun_list>'' Parse Var g.0fun_list fun g.0fun_list Say right(cnt.fun,3) fun End Say time('E') 'seconds elapsed for' t.0 'tokens in' g.0lines 'lines.' Exit init: /*********************************************************************** * Initialize constants etc. ***********************************************************************/ g.='' g.0debug=0 /* set debug off by default */ fid=strip(fid) If fid='' Then /* no file specified */ Exit exit(12 'no input file specified') Parse Var fid fn '.' os=options /* options specified on command */ g.0debug=0 /* turn off debug output */ g.0tokens=0 /* No token file */ Do While os<>'' /* process them individually */ Parse Upper Var os o os /* pick one */ Select When abbrev('DEBUG',o,1) Then /* Debug specified */ g.0debug=1 /* turn on debug output */ When abbrev('TOKENS',o,1) Then /* Write a file with tokens */ g.0tokens=1 Otherwise /* anything else */ Say 'Unknown option:' o /* tell the user and ignore it */ End End If g.0debug Then Do g.0dbg=fn'.dbg'; '@erase' g.0dbg End If g.0tokens Then Do g.0tkf=fn'.tok'; '@erase' g.0tkf End /*********************************************************************** * Language specifics ***********************************************************************/ g.0special='+-*/%''";:<>^\=|,()& '/* special characters */ /* chars that may start a var */ g.0a='abcdefghijklmnopqrstuvwxyz'||, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ@#$!?_' g.0n='1234567890' /* numeric characters */ g.0vc=g.0a||g.0n||'.' /* var-character */ /* multi-character operators */ g.0opx='&& ** // << <<= <= <> == >< >= >> >>=', '^< ^<< ^= ^== ^> ^>> \< \<< \= \== \> \>> ||' t.='' /* token list */ Return readin: /*********************************************************************** * Read the file to be formatted ***********************************************************************/ lc='' i=0 g.0lines=0 Do While lines(fid)<>0 li=linein(fid) g.0lines=g.0lines+1 If i>0 Then lc=strip(l.i,'T') If right(lc,1)=',' Then Do l.i=left(lc,length(lc)-1) li End Else Do i=i+1 l.i=li End End l.0=i Call lineout fid t=l.0+1 l.t=g.0eof /* add a stopper at program end */ l.0=t /* adjust number of lines */ g.0il=t /* remember end of program */ Return tokenize: /*********************************************************************** * First perform tokenization * Input: l.* Program text * Output: t.* Token list * t.0t.i token type CA CB CC C comment begin/middle/end * S string * O operator (special character) * V variable symbol * N constant * X end of text * Note: special characters are treated as separate tokens ***********************************************************************/ li=0 /* line index */ ti=0 /* token index */ Do While li'' /* work through the line */ nbc=verify(l,' ') /* first non-blank column */ g.0cc=g.0cc+nbc /* advance to this */ If g.0newline='' Then Do If t.ti.0ic='' Then t.ti.0ic=0 If g.0cc=t.ti.0ic+length(t.ti) Then Do tj=ti+1 t.tj.0ad=1 End End l=substr(l,nbc) /* and continue with rest of line */ Parse Var l c +1 l 1 c2 +2 /* get character(s) */ g.0tb=g.0cc /* remember where token starts */ Select /* take a decision */ When c2='/*' Then /* comment starts here */ Call comment /* process comment */ When pos(c,'''"')>0 Then /* literal string starts here */ Call string c /* process literal string */ Otherwise /* neither comment nor literal */ Call token /* get other token */ End /* cmt, string, or token done */ End /* end of loop over line */ End /* end of loop over program */ t.0=ti /* store number of tokens */ Call dsp ti 'tokens' l.0 'lines' Return comment: /*********************************************************************** * Parse a comment * Nested comments are supported ***********************************************************************/ cbeg=t.ti.0il l=substr(l,2) /* continue after slash-asterisk */ g.0cc=g.0cc+1 /* update current char position */ t='/*' /* token so far */ incmt=1 /* indicate "within a comment" */ Do Until incmt=0 /* loop until done */ bc=pos('/*',l) /* next begin comment, if any */ ec=pos('*/',l) /* next end comment, if any */ Select /* decide */ When bc>0 &, /* begin-comment found */ (ec=0 | bc0 Then Do /* end-comment found */ t=t||left(l,ec+1) /* add all to token */ incmt=incmt-1 /* decrement nesting */ l=substr(l,ec+2) /* continue after asterisk-slash */ g.0cc=g.0cc+ec+1 /* update current char position */ End Otherwise Do /* no further comment bracket */ Call addtoken t||l,ct() /* rest of line to token */ li=li+1 /* proceed to next line */ l=l.li /* contents of next line */ g.0newline=1 If l=g.0eof Then Do Say 'Comment started in line' cbeg 'is not closed before EOF' Exit err(58) End g.0cc=0 /* current char (none) */ g.0tb=1 /* token (comment) starts here */ End End End Call addtoken t,ct() /* last (or only) comment token */ If pos('*debug*',t)>0 Then g.0debug=1 Return ct: /*********************************************************************** * Comment type ***********************************************************************/ If incmt>0 Then Do /* within a comment */ If t.ti.0t='CA' |, /* prev. token was start or cont */ t.ti.0t='CB' Then Return 'CB' /* this is continuation */ Else Return 'CA' /* this is start */ End Else Do /* comment is over */ If t.ti.0t='CA' |, /* prev. token was start or cont */ t.ti.0t='CB' Then Return 'CC' /* this is final part */ Else Return 'C' /* this is just a comment */ End string: /*********************************************************************** * Parse a string * take care of '111'B and '123'X ***********************************************************************/ Parse Arg delim /* string delimiter found */ t=delim /* star building the token */ instr=1 /* note we are within a string */ g.0ss=li Do Until instr=0 /* continue until it is over */ se=pos(delim,l) /* ending delimiter */ If se>0 Then Do /* found */ If substr(l,se+1,1)=delim Then Do /* but it is doubled */ t=t||left(l,se+1) /* so add all so far to token */ l=substr(l,se+2) /* and take rest of line */ g.0cc=g.0cc+se+1 /* and set current character pos */ End Else Do /* not another one */ instr=0 /* string is done */ t=t||left(l,se) /* add the string data to token */ l=substr(l,se+1) /* take the rest of the line */ g.0cc=g.0cc+se /* and set current character pos */ If pos(translate(left(l,1)),'BX')>0 Then If pos(substr(l,2,1),g.0vc)=0 Then Do t=t||left(l,1) /* add the char to the token */ l=substr(l,2) /* take the rest of the line */ g.0cc=g.0cc+1 /* and set current character pos */ End End End Else Do /* not found */ Call addtoken t||l,'S' /* store the token */ g.0lasttoken='' /* reset this switch */ li=li+1 /* go on to the next line */ If li>l.0 Then /* there is no next line */ Exit err(60,'string starting in line' g.0ss, 'does not end before end of file') Else Say 'string starting at line' g.0ss 'extended over line boundary' l=l.li /* take contents of the next line */ g.0cc=1 /* current char position */ g.0tb=1 /* ?? */ End End Call addtoken t,'S' /* store the token */ Return token: /*********************************************************************** * Parse a token ***********************************************************************/ IF c=g.0comma & l='' Then Do t=g.0cont type='O' /* O (for operator - not quite...)*/ End Else Do If pos(c,g.0special)>0 Then Do /* a special character */ t=c /* take it as is */ type='O' /* O (for operator - not quite...)*/ End Else Do /* some other character */ nsp=verify(l,g.0special,'M') /* find delimiting character */ If nsp>0 Then Do /* some character found */ t=c||left(l,nsp-1) /* take all up to this character */ l=substr(l,nsp) /* and continue from there */ End Else Do /* none found */ t=c||l /* add rest of line to token */ l='' /* and all is used up */ End g.0cc=g.0cc+length(t)-1 /* adjust current char position */ If pos(right(t,1),'eE')>0 &, /* consider nxxxE+nn case */ pos(left(l,1),'+-')>0 Then Do If pos(left(t,1),'.1234567890')>0 Then /* start . or digit */ If pos(substr(l,2,1),'1234567890')>0 Then Do /* dig after+- */ nsp=verify(substr(l,2),g.0special,'M')+1 /* find end */ If nsp>1 Then /* delimiting character found */ exp=substr(l,2,nsp-2) /* exponent (if numeric) */ Else exp=substr(l,2) If verify(exp,'0123456789')=0 Then Do t=t||left(l,1)||exp l=substr(l,length(exp)+2) g.0cc=g.0cc+length(exp)+2 End End End Select When isvar(t) Then /* token qualifies as variable */ type='V' When isconst(t) Then /* token is a constant symbol */ type='N' When t=g.0eof Then /* token is end of file indication*/ type='X' Otherwise Do /* anything else is an error */ Say 'li='li Say l Say 'token error' Trace ?R Exit err(62,'token' t 'is neither variable nor constant') End End If left(l,1)='(' Then type=type||'F' End End Call addtoken t,type /* store the token */ Return addtoken: /*********************************************************************** * Add a token to the token list ***********************************************************************/ Parse Arg t,type /* token and its type */ If type='O' Then Do /* operator (special character) */ If pos(t,'><=&|/*')>0 Then Do /* char for composite operator */ If wordpos(t.ti||t,g.0opx)>0 Then Do /* composite operator */ t.ti=t.ti||t /* use concatenation */ /* does not handle =/**/= */ t='' /* we are done */ Return End End End If type='CC' & t='*/' Then Do /* The special case for SPA */ Return End ti=ti+1 /* increment index */ t.ti=t /* store token's value */ t.ti.0t=left(type,1) /* and its type */ t.ti.0nl=g.0newline /* token starts a new line */ g.0newline='' /* reset new line switch */ If t.ti.0t='C' Then Do t.ti.0t=type If left(t.ti,3)='/* ' &, right(t.ti,3)=' */' Then t.ti='/*' strip(substr(t.ti,4,length(t.ti)-6)) '*/' End t.ti.0f=substr(type,2,1) /* 'F' if possibly a function */ Call setpos ti li g.0tb /* and its position */ If left(type,1)='C' Then /* ??? */ If left(t.ti,2)<>'/*' Then Do ts=strip(t.ti,'L') t.ti.0oc=t.ti.0oc+length(t.ti)-length(ts) t.ti=ts End If t.ti.0ol='' Then t.ti.0ol=li If t.ti.0oc='' Then t.ti.0oc=0 t.ti.0il=t.ti.0ol /* and its position */ t.ti.0ic=t.ti.0oc /* and its position */ Call dsp ti t.ti t.ti.0il'/'t.ti.0ic '->' t.ti.0ol'/'t.ti.0oc t='' /* reset token variable */ Return lookback: /*********************************************************************** * Look back if... ***********************************************************************/ Do i_=ti To 1 By -1 Select When left(t.i_.0t,1)='C' Then Nop When t.i_.0used<>1 &, (t.i_=g.0comma |, t.i_=g.0cont) Then Do t.i_.0used=1 t.i_=g.0cont Return '0' End Otherwise Return '1' End End Return '1' isvar: /*********************************************************************** * Determine if a string qualifies as variable name ***********************************************************************/ Parse Arg a_ +1 b_ res=(pos(a_,g.0a)>0) &, (verify(b_,g.0a||g.0n||'.')=0) Return res isconst: /*********************************************************************** * Determine if a string qualifies as constant ***********************************************************************/ Parse Arg a_ res=(verify(a_,g.0a||g.0n||'.+-')=0) /* ??? */ Return res setpos: Parse Arg seti sol soc setz='setpos:' t.seti t.seti.0ol'/'t.seti.0oc '-->', sol'/'soc '('sigl')' Call dsp setz t.seti.0ol=sol t.seti.0oc=soc Return process_tokens: /*********************************************************************** * Process the token list ***********************************************************************/ Do i=1 To t.0 If g.0tokens Then Call lineout g.0tkf,right(i,4) right(t.i.0il,3)'.'left(t.i.0ic,3), right(t.i.0ol,3)'.'left(t.i.0oc,3), left(t.i.0t,2) left(t.i,25) If t.i='(' Then Do j=i-1 If t.j.0ol=t.i.0il & , t.j.0oc+length(t.j)=t.i.0ic &, pos(t.j.0t,'VS')>0 Then Call store_f t.j End End If g.0tokens Then Call lineout g.0tkf Return store_f: Parse Arg funct If wordpos(funct,g.0fun_list)=0 then g.0fun_list=g.0fun_list funct cnt.funct=cnt.funct+1 Return dsp: /*********************************************************************** * Record (and display) a debug line ***********************************************************************/ Parse Arg ol_.1 If g.0debug>0 Then Call lineout g.0dbg,ol_.1 If g.0debug>1 Then Say ol_.1 Return wordsort: Procedure /********************************************************************** * Sort the list of words supplied as argument. Return the sorted list **********************************************************************/ Parse Arg wl wa.='' wa.0=0 Do While wl<>'' Parse Var wl w wl Do i=1 To wa.0 If wa.i>w Then Leave End If i<=wa.0 Then Do Do j=wa.0 To i By -1 ii=j+1 wa.ii=wa.j End End wa.i=w wa.0=wa.0+1 End swl='' Do i=1 To wa.0 swl=swl wa.i End Return strip(swl) err: /*********************************************************************** * Diagnostic error exit ***********************************************************************/ Parse Arg errnum, errtxt Say 'err:' errnum errtxt If t.ti.0il>g.0il Then Say 'Error' arg(1) 'at end of file' Else Do Say 'Error' arg(1) 'around line' t.ti.0il', column' t.ti.0ic _=t.ti.0il Say l._ Say copies(' ',t.ti.0ic-1)'|' End If errtxt<>'' Then Say ' 'errtxt Exit 12