85 lines
2.7 KiB
Lua
85 lines
2.7 KiB
Lua
-- module lpeg_token_finder
|
|
local M = {} -- only items added to M will be public (via 'return M' at end)
|
|
local table, concat = table, table.concat
|
|
local error, tonumber = error, tonumber
|
|
|
|
local lpeg = require 'lpeg' -- see http://www.inf.puc-rio.br/~roberto/lpeg/
|
|
local token_name = require 'token_name'
|
|
_ENV = {}
|
|
|
|
local imports = 'P R S C Carg Cb Cc Cf Cg Cp Cs Ct Cmt V'
|
|
for w in imports:gmatch('%a+') do _ENV[w] = lpeg[w] end
|
|
|
|
------------------- Define patterns to match tokens -----------------------
|
|
|
|
alpha = R'az' + R'AZ' + P'_'
|
|
digit = R'09'
|
|
alnum = alpha + digit
|
|
space = S' \t\r\n'
|
|
|
|
function ptok(text) return {name=token_name[text]} end
|
|
op2c = C(P'<=' + P'>=' + P'==' + P'!=' + P'&&' + P'||') / ptok
|
|
op1c = C(S'*/%+-<>!=') / ptok
|
|
symbol = C(S'(){};,') / ptok
|
|
|
|
keyword_or_identifier = C(alpha * alnum^0) / function(text)
|
|
local name = token_name[text]
|
|
return name and {name=name} or {name='Identifier', value=text}
|
|
end
|
|
|
|
integer = C(digit^1) * -alpha / function(text)
|
|
return {name='Integer', value=tonumber(text)}
|
|
end
|
|
|
|
Cline = Carg(1) -- call to 'match' sets the first extra argument to source line number
|
|
|
|
bad_escseq_err = Cmt(Cline, function (_,pos,line)
|
|
error{err='bad_escseq', line=line, column=pos-1}
|
|
end)
|
|
|
|
esc_subst = {['\\'] = '\\', ['n'] = '\n'}
|
|
escseq = P'\\' * C(S'\\n' + bad_escseq_err) / esc_subst
|
|
|
|
qchar = P"'" * ( C( P(1) - S"'\n\\" ) + escseq ) * P"'" / function (text)
|
|
return {name='Integer', value=text:byte()}
|
|
end
|
|
|
|
qstr = P'"' * ( C((P(1) - S'"\n\\')^1) + escseq )^0 * P'"' / function(...)
|
|
return {name='String', value=concat{...}}
|
|
end
|
|
|
|
Ctoken = symbol + op2c + op1c + keyword_or_identifier + integer + qstr + qchar
|
|
|
|
unfinished_comment_err = Cmt(Cline * Cb('SOC'), function (_, pos, line, socpos)
|
|
error{err='unfinished_comment', line=line, column=socpos}
|
|
end)
|
|
commentstart = Cg(Cp() * P'/*', 'SOC')
|
|
commentrest = (P(1) - P'*/')^0 * (P'*/' + unfinished_comment_err)
|
|
comment = commentstart * commentrest
|
|
morecomment = Cg(Cp(), 'SOC') * commentrest
|
|
|
|
ws = (space^1 + comment)^0
|
|
|
|
bad_token_err = Cmt(Cline, function (_, pos, line)
|
|
error{err='invalid_token', line=line, column=pos}
|
|
end)
|
|
|
|
tokenpat = ws * Cline * Cp() * (C(-1) + Ctoken + bad_token_err) * Cp() /
|
|
function (line, pos, token, nextpos)
|
|
if pos == nextpos then -- at end of line; no token
|
|
return nil
|
|
else
|
|
token.line, token.column = line, pos
|
|
return token, nextpos
|
|
end
|
|
end
|
|
|
|
closecomment_tokenpat = morecomment * tokenpat
|
|
|
|
function M.find_token(line, line_pos, line_number, in_comment)
|
|
pattern = in_comment and closecomment_tokenpat or tokenpat
|
|
return lpeg.match(pattern, line, line_pos, line_number)
|
|
end
|
|
|
|
return M
|