511 lines
21 KiB
Rexx
511 lines
21 KiB
Rexx
/* REXX ****************************************** Version 11.12.2015 **
|
|
* Rexx Tokenizer to find function invocations
|
|
*-----------------------------------------------------------------------
|
|
* Tokenization remembers the following for each token
|
|
* t.i text of token
|
|
* t.i.0t type of token: Cx/V/K/N/O/S/L
|
|
* comment/variable/keyword/constant/operator/string/label
|
|
* t.i.0il line of token in the input
|
|
* t.i.0ic col of token in the input
|
|
* t.i.0prev index of token starting previous instruction
|
|
* t.i.0ol line of token in the output
|
|
* t.i.0oc col of token in the output
|
|
*---------------------------------------------------------------------*/
|
|
Call time 'R'
|
|
Parse Upper Arg fid '(' options
|
|
If fid='?' Then Do
|
|
Say 'Tokenike a REXX proram and list the function invocations found'
|
|
Say ' which are of the form symbol(... or ''string''(...'
|
|
Say ' (the left parenthesis must immediately follow the symbol'
|
|
Say ' or literal string.)'
|
|
Say 'Syntax:'
|
|
Say ' TKZ pgm < ( <Debug> <Tokens> >'
|
|
Exit
|
|
End
|
|
g.=0
|
|
Call init /* Initialize constants etc. */
|
|
g.0cont='01'x
|
|
g.0breakc='02'x
|
|
cnt.=0
|
|
Call readin /* Read input file into l.* */
|
|
Call tokenize /* Tokenize the input */
|
|
tk=''
|
|
Call process_tokens
|
|
g.0fun_list=wordsort(g.0fun_list)
|
|
Do While g.0fun_list>''
|
|
Parse Var g.0fun_list fun g.0fun_list
|
|
Say right(cnt.fun,3) fun
|
|
End
|
|
Say time('E') 'seconds elapsed for' t.0 'tokens in' g.0lines 'lines.'
|
|
Exit
|
|
|
|
init:
|
|
/***********************************************************************
|
|
* Initialize constants etc.
|
|
***********************************************************************/
|
|
g.=''
|
|
g.0debug=0 /* set debug off by default */
|
|
|
|
fid=strip(fid)
|
|
If fid='' Then /* no file specified */
|
|
Exit exit(12 'no input file specified')
|
|
Parse Var fid fn '.'
|
|
|
|
os=options /* options specified on command */
|
|
g.0debug=0 /* turn off debug output */
|
|
g.0tokens=0 /* No token file */
|
|
Do While os<>'' /* process them individually */
|
|
Parse Upper Var os o os /* pick one */
|
|
Select
|
|
When abbrev('DEBUG',o,1) Then /* Debug specified */
|
|
g.0debug=1 /* turn on debug output */
|
|
When abbrev('TOKENS',o,1) Then /* Write a file with tokens */
|
|
g.0tokens=1
|
|
Otherwise /* anything else */
|
|
Say 'Unknown option:' o /* tell the user and ignore it */
|
|
End
|
|
End
|
|
|
|
If g.0debug Then Do
|
|
g.0dbg=fn'.dbg'; '@erase' g.0dbg
|
|
End
|
|
If g.0tokens Then Do
|
|
g.0tkf=fn'.tok'; '@erase' g.0tkf
|
|
End
|
|
|
|
/***********************************************************************
|
|
* Language specifics
|
|
***********************************************************************/
|
|
g.0special='+-*/%''";:<>^\=|,()& '/* special characters */
|
|
/* chars that may start a var */
|
|
g.0a='abcdefghijklmnopqrstuvwxyz'||,
|
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ@#$!?_'
|
|
g.0n='1234567890' /* numeric characters */
|
|
g.0vc=g.0a||g.0n||'.' /* var-character */
|
|
/* multi-character operators */
|
|
g.0opx='&& ** // << <<= <= <> == >< >= >> >>=',
|
|
'^< ^<< ^= ^== ^> ^>> \< \<< \= \== \> \>> ||'
|
|
|
|
t.='' /* token list */
|
|
Return
|
|
|
|
readin:
|
|
/***********************************************************************
|
|
* Read the file to be formatted
|
|
***********************************************************************/
|
|
lc=''
|
|
i=0
|
|
g.0lines=0
|
|
Do While lines(fid)<>0
|
|
li=linein(fid)
|
|
g.0lines=g.0lines+1
|
|
If i>0 Then
|
|
lc=strip(l.i,'T')
|
|
If right(lc,1)=',' Then Do
|
|
l.i=left(lc,length(lc)-1) li
|
|
End
|
|
Else Do
|
|
i=i+1
|
|
l.i=li
|
|
End
|
|
End
|
|
l.0=i
|
|
Call lineout fid
|
|
t=l.0+1
|
|
l.t=g.0eof /* add a stopper at program end */
|
|
l.0=t /* adjust number of lines */
|
|
g.0il=t /* remember end of program */
|
|
Return
|
|
|
|
tokenize:
|
|
/***********************************************************************
|
|
* First perform tokenization
|
|
* Input: l.* Program text
|
|
* Output: t.* Token list
|
|
* t.0t.i token type CA CB CC C comment begin/middle/end
|
|
* S string
|
|
* O operator (special character)
|
|
* V variable symbol
|
|
* N constant
|
|
* X end of text
|
|
* Note: special characters are treated as separate tokens
|
|
***********************************************************************/
|
|
li=0 /* line index */
|
|
ti=0 /* token index */
|
|
Do While li<l.0 /* as long as there is more input */
|
|
li=li+1 /* index of next line */
|
|
l=l.li /* next line to be processed */
|
|
g.0newline=1
|
|
g.0cc=0 /* current column */
|
|
Call dsp l.li /* debug output */
|
|
If l='' Then /* empty line */
|
|
Call addtoken '/*--*/','C' /* preserve with special token */
|
|
Do While l<>'' /* work through the line */
|
|
nbc=verify(l,' ') /* first non-blank column */
|
|
g.0cc=g.0cc+nbc /* advance to this */
|
|
If g.0newline='' Then Do
|
|
If t.ti.0ic='' Then
|
|
t.ti.0ic=0
|
|
If g.0cc=t.ti.0ic+length(t.ti) Then Do
|
|
tj=ti+1
|
|
t.tj.0ad=1
|
|
End
|
|
End
|
|
l=substr(l,nbc) /* and continue with rest of line */
|
|
Parse Var l c +1 l 1 c2 +2 /* get character(s) */
|
|
g.0tb=g.0cc /* remember where token starts */
|
|
Select /* take a decision */
|
|
When c2='/*' Then /* comment starts here */
|
|
Call comment /* process comment */
|
|
When pos(c,'''"')>0 Then /* literal string starts here */
|
|
Call string c /* process literal string */
|
|
Otherwise /* neither comment nor literal */
|
|
Call token /* get other token */
|
|
End /* cmt, string, or token done */
|
|
End /* end of loop over line */
|
|
End /* end of loop over program */
|
|
t.0=ti /* store number of tokens */
|
|
Call dsp ti 'tokens' l.0 'lines'
|
|
Return
|
|
comment:
|
|
/***********************************************************************
|
|
* Parse a comment
|
|
* Nested comments are supported
|
|
***********************************************************************/
|
|
cbeg=t.ti.0il
|
|
l=substr(l,2) /* continue after slash-asterisk */
|
|
g.0cc=g.0cc+1 /* update current char position */
|
|
t='/*' /* token so far */
|
|
incmt=1 /* indicate "within a comment" */
|
|
Do Until incmt=0 /* loop until done */
|
|
bc=pos('/*',l) /* next begin comment, if any */
|
|
ec=pos('*/',l) /* next end comment, if any */
|
|
Select /* decide */
|
|
When bc>0 &, /* begin-comment found */
|
|
(ec=0 | bc<ec) Then Do /* and no end-comment or later */
|
|
t=t||left(l,bc+1) /* add this all to token */
|
|
incmt=incmt+1 /* increment comment nest-depth */
|
|
l=substr(l,bc+2) /* continue after slash-asterisk */
|
|
g.0cc=g.0cc+bc+1 /* update current char position */
|
|
End
|
|
When ec>0 Then Do /* end-comment found */
|
|
t=t||left(l,ec+1) /* add all to token */
|
|
incmt=incmt-1 /* decrement nesting */
|
|
l=substr(l,ec+2) /* continue after asterisk-slash */
|
|
g.0cc=g.0cc+ec+1 /* update current char position */
|
|
End
|
|
Otherwise Do /* no further comment bracket */
|
|
Call addtoken t||l,ct() /* rest of line to token */
|
|
li=li+1 /* proceed to next line */
|
|
l=l.li /* contents of next line */
|
|
g.0newline=1
|
|
If l=g.0eof Then Do
|
|
Say 'Comment started in line' cbeg 'is not closed before EOF'
|
|
Exit err(58)
|
|
End
|
|
g.0cc=0 /* current char (none) */
|
|
g.0tb=1 /* token (comment) starts here */
|
|
End
|
|
End
|
|
End
|
|
Call addtoken t,ct() /* last (or only) comment token */
|
|
If pos('*debug*',t)>0 Then g.0debug=1
|
|
Return
|
|
|
|
ct:
|
|
/***********************************************************************
|
|
* Comment type
|
|
***********************************************************************/
|
|
If incmt>0 Then Do /* within a comment */
|
|
If t.ti.0t='CA' |, /* prev. token was start or cont */
|
|
t.ti.0t='CB' Then Return 'CB' /* this is continuation */
|
|
Else Return 'CA' /* this is start */
|
|
End
|
|
Else Do /* comment is over */
|
|
If t.ti.0t='CA' |, /* prev. token was start or cont */
|
|
t.ti.0t='CB' Then Return 'CC' /* this is final part */
|
|
Else Return 'C' /* this is just a comment */
|
|
End
|
|
string:
|
|
/***********************************************************************
|
|
* Parse a string
|
|
* take care of '111'B and '123'X
|
|
***********************************************************************/
|
|
Parse Arg delim /* string delimiter found */
|
|
t=delim /* star building the token */
|
|
instr=1 /* note we are within a string */
|
|
g.0ss=li
|
|
Do Until instr=0 /* continue until it is over */
|
|
se=pos(delim,l) /* ending delimiter */
|
|
If se>0 Then Do /* found */
|
|
If substr(l,se+1,1)=delim Then Do /* but it is doubled */
|
|
t=t||left(l,se+1) /* so add all so far to token */
|
|
l=substr(l,se+2) /* and take rest of line */
|
|
g.0cc=g.0cc+se+1 /* and set current character pos */
|
|
End
|
|
Else Do /* not another one */
|
|
instr=0 /* string is done */
|
|
t=t||left(l,se) /* add the string data to token */
|
|
l=substr(l,se+1) /* take the rest of the line */
|
|
g.0cc=g.0cc+se /* and set current character pos */
|
|
If pos(translate(left(l,1)),'BX')>0 Then
|
|
If pos(substr(l,2,1),g.0vc)=0 Then Do
|
|
t=t||left(l,1) /* add the char to the token */
|
|
l=substr(l,2) /* take the rest of the line */
|
|
g.0cc=g.0cc+1 /* and set current character pos */
|
|
End
|
|
End
|
|
End
|
|
Else Do /* not found */
|
|
Call addtoken t||l,'S' /* store the token */
|
|
g.0lasttoken='' /* reset this switch */
|
|
li=li+1 /* go on to the next line */
|
|
If li>l.0 Then /* there is no next line */
|
|
Exit err(60,'string starting in line' g.0ss,
|
|
'does not end before end of file')
|
|
Else
|
|
Say 'string starting at line' g.0ss 'extended over line boundary'
|
|
l=l.li /* take contents of the next line */
|
|
g.0cc=1 /* current char position */
|
|
g.0tb=1 /* ?? */
|
|
End
|
|
End
|
|
Call addtoken t,'S' /* store the token */
|
|
Return
|
|
token:
|
|
/***********************************************************************
|
|
* Parse a token
|
|
***********************************************************************/
|
|
IF c=g.0comma & l='' Then Do
|
|
t=g.0cont
|
|
type='O' /* O (for operator - not quite...)*/
|
|
End
|
|
Else Do
|
|
If pos(c,g.0special)>0 Then Do /* a special character */
|
|
t=c /* take it as is */
|
|
type='O' /* O (for operator - not quite...)*/
|
|
End
|
|
Else Do /* some other character */
|
|
nsp=verify(l,g.0special,'M') /* find delimiting character */
|
|
If nsp>0 Then Do /* some character found */
|
|
t=c||left(l,nsp-1) /* take all up to this character */
|
|
l=substr(l,nsp) /* and continue from there */
|
|
End
|
|
Else Do /* none found */
|
|
t=c||l /* add rest of line to token */
|
|
l='' /* and all is used up */
|
|
End
|
|
g.0cc=g.0cc+length(t)-1 /* adjust current char position */
|
|
If pos(right(t,1),'eE')>0 &, /* consider nxxxE+nn case */
|
|
pos(left(l,1),'+-')>0 Then Do
|
|
If pos(left(t,1),'.1234567890')>0 Then /* start . or digit */
|
|
If pos(substr(l,2,1),'1234567890')>0 Then Do /* dig after+- */
|
|
nsp=verify(substr(l,2),g.0special,'M')+1 /* find end */
|
|
If nsp>1 Then /* delimiting character found */
|
|
exp=substr(l,2,nsp-2) /* exponent (if numeric) */
|
|
Else
|
|
exp=substr(l,2)
|
|
If verify(exp,'0123456789')=0 Then Do
|
|
t=t||left(l,1)||exp
|
|
l=substr(l,length(exp)+2)
|
|
g.0cc=g.0cc+length(exp)+2
|
|
End
|
|
End
|
|
End
|
|
Select
|
|
When isvar(t) Then /* token qualifies as variable */
|
|
type='V'
|
|
When isconst(t) Then /* token is a constant symbol */
|
|
type='N'
|
|
When t=g.0eof Then /* token is end of file indication*/
|
|
type='X'
|
|
Otherwise Do /* anything else is an error */
|
|
Say 'li='li
|
|
Say l
|
|
Say 'token error'
|
|
Trace ?R
|
|
Exit err(62,'token' t 'is neither variable nor constant')
|
|
End
|
|
End
|
|
If left(l,1)='(' Then
|
|
type=type||'F'
|
|
End
|
|
End
|
|
Call addtoken t,type /* store the token */
|
|
Return
|
|
addtoken:
|
|
/***********************************************************************
|
|
* Add a token to the token list
|
|
***********************************************************************/
|
|
Parse Arg t,type /* token and its type */
|
|
If type='O' Then Do /* operator (special character) */
|
|
If pos(t,'><=&|/*')>0 Then Do /* char for composite operator */
|
|
If wordpos(t.ti||t,g.0opx)>0 Then Do /* composite operator */
|
|
t.ti=t.ti||t /* use concatenation */
|
|
/* does not handle =/**/= */
|
|
t='' /* we are done */
|
|
Return
|
|
End
|
|
End
|
|
End
|
|
|
|
If type='CC' & t='*/' Then Do /* The special case for SPA */
|
|
Return
|
|
End
|
|
|
|
ti=ti+1 /* increment index */
|
|
t.ti=t /* store token's value */
|
|
t.ti.0t=left(type,1) /* and its type */
|
|
t.ti.0nl=g.0newline /* token starts a new line */
|
|
g.0newline='' /* reset new line switch */
|
|
If t.ti.0t='C' Then Do
|
|
t.ti.0t=type
|
|
If left(t.ti,3)='/* ' &,
|
|
right(t.ti,3)=' */' Then
|
|
t.ti='/*' strip(substr(t.ti,4,length(t.ti)-6)) '*/'
|
|
End
|
|
t.ti.0f=substr(type,2,1) /* 'F' if possibly a function */
|
|
Call setpos ti li g.0tb /* and its position */
|
|
If left(type,1)='C' Then /* ??? */
|
|
If left(t.ti,2)<>'/*' Then Do
|
|
ts=strip(t.ti,'L')
|
|
t.ti.0oc=t.ti.0oc+length(t.ti)-length(ts)
|
|
t.ti=ts
|
|
End
|
|
If t.ti.0ol='' Then t.ti.0ol=li
|
|
If t.ti.0oc='' Then t.ti.0oc=0
|
|
t.ti.0il=t.ti.0ol /* and its position */
|
|
t.ti.0ic=t.ti.0oc /* and its position */
|
|
Call dsp ti t.ti t.ti.0il'/'t.ti.0ic '->' t.ti.0ol'/'t.ti.0oc
|
|
t='' /* reset token variable */
|
|
Return
|
|
|
|
lookback:
|
|
/***********************************************************************
|
|
* Look back if...
|
|
***********************************************************************/
|
|
Do i_=ti To 1 By -1
|
|
Select
|
|
When left(t.i_.0t,1)='C' Then Nop
|
|
When t.i_.0used<>1 &,
|
|
(t.i_=g.0comma |,
|
|
t.i_=g.0cont) Then Do
|
|
t.i_.0used=1
|
|
t.i_=g.0cont
|
|
Return '0'
|
|
End
|
|
Otherwise
|
|
Return '1'
|
|
End
|
|
End
|
|
Return '1'
|
|
|
|
isvar:
|
|
/***********************************************************************
|
|
* Determine if a string qualifies as variable name
|
|
***********************************************************************/
|
|
Parse Arg a_ +1 b_
|
|
res=(pos(a_,g.0a)>0) &,
|
|
(verify(b_,g.0a||g.0n||'.')=0)
|
|
Return res
|
|
|
|
isconst:
|
|
/***********************************************************************
|
|
* Determine if a string qualifies as constant
|
|
***********************************************************************/
|
|
Parse Arg a_
|
|
res=(verify(a_,g.0a||g.0n||'.+-')=0) /* ??? */
|
|
Return res
|
|
|
|
setpos:
|
|
Parse Arg seti sol soc
|
|
setz='setpos:' t.seti t.seti.0ol'/'t.seti.0oc '-->',
|
|
sol'/'soc '('sigl')'
|
|
Call dsp setz
|
|
t.seti.0ol=sol
|
|
t.seti.0oc=soc
|
|
Return
|
|
|
|
process_tokens:
|
|
/***********************************************************************
|
|
* Process the token list
|
|
***********************************************************************/
|
|
Do i=1 To t.0
|
|
If g.0tokens Then
|
|
Call lineout g.0tkf,right(i,4) right(t.i.0il,3)'.'left(t.i.0ic,3),
|
|
right(t.i.0ol,3)'.'left(t.i.0oc,3),
|
|
left(t.i.0t,2) left(t.i,25)
|
|
If t.i='(' Then Do
|
|
j=i-1
|
|
If t.j.0ol=t.i.0il & ,
|
|
t.j.0oc+length(t.j)=t.i.0ic &,
|
|
pos(t.j.0t,'VS')>0 Then
|
|
Call store_f t.j
|
|
End
|
|
End
|
|
If g.0tokens Then
|
|
Call lineout g.0tkf
|
|
Return
|
|
|
|
store_f:
|
|
Parse Arg funct
|
|
If wordpos(funct,g.0fun_list)=0 then
|
|
g.0fun_list=g.0fun_list funct
|
|
cnt.funct=cnt.funct+1
|
|
Return
|
|
|
|
dsp:
|
|
/***********************************************************************
|
|
* Record (and display) a debug line
|
|
***********************************************************************/
|
|
Parse Arg ol_.1
|
|
If g.0debug>0 Then
|
|
Call lineout g.0dbg,ol_.1
|
|
If g.0debug>1 Then
|
|
Say ol_.1
|
|
Return
|
|
|
|
wordsort: Procedure
|
|
/**********************************************************************
|
|
* Sort the list of words supplied as argument. Return the sorted list
|
|
**********************************************************************/
|
|
Parse Arg wl
|
|
wa.=''
|
|
wa.0=0
|
|
Do While wl<>''
|
|
Parse Var wl w wl
|
|
Do i=1 To wa.0
|
|
If wa.i>w Then Leave
|
|
End
|
|
If i<=wa.0 Then Do
|
|
Do j=wa.0 To i By -1
|
|
ii=j+1
|
|
wa.ii=wa.j
|
|
End
|
|
End
|
|
wa.i=w
|
|
wa.0=wa.0+1
|
|
End
|
|
swl=''
|
|
Do i=1 To wa.0
|
|
swl=swl wa.i
|
|
End
|
|
Return strip(swl)
|
|
|
|
err:
|
|
/***********************************************************************
|
|
* Diagnostic error exit
|
|
***********************************************************************/
|
|
Parse Arg errnum, errtxt
|
|
Say 'err:' errnum errtxt
|
|
If t.ti.0il>g.0il Then
|
|
Say 'Error' arg(1) 'at end of file'
|
|
Else Do
|
|
Say 'Error' arg(1) 'around line' t.ti.0il', column' t.ti.0ic
|
|
_=t.ti.0il
|
|
Say l._
|
|
Say copies(' ',t.ti.0ic-1)'|'
|
|
End
|
|
If errtxt<>'' Then Say ' 'errtxt
|
|
Exit 12
|