RosettaCodeData/Task/Compiler-syntax-analyzer/Common-Lisp/compiler-syntax-analyzer.lisp

413 lines
12 KiB
Common Lisp

#!/bin/sh
#|-*- mode:lisp -*-|#
#|
exec ros -Q -- $0 "$@"
|#
(progn ;;init forms
(ros:ensure-asdf)
#+quicklisp(ql:quickload '() :silent t))
(defpackage :ros.script.parse.3859374047
(:use :cl))
(in-package :ros.script.parse.3859374047)
;;;
;;; The Rosetta Code Tiny-Language Parser, in Common Lisp.
;;;
(require "cl-ppcre")
(require "trivia")
(defstruct tokstruc line-no column-no tok tokval)
(defconstant re-blank-line
(ppcre:create-scanner "^\\s*$"))
(defconstant re-token-1
(ppcre:create-scanner
"^\\s*(\\d+)\\s+(\\d+)\\s+(\\S+)\\s*$"))
(defconstant re-token-2
(ppcre:create-scanner
"^\\s*(\\d+)\\s+(\\d+)\\s+(\\S+)\\s+(\\S(.*\\S)?)\\s*$"))
(defun string-to-tok (s)
(trivia:match s
("Keyword_else" 'TOK-ELSE)
("Keyword_if" 'TOK-IF)
("Keyword_print" 'TOK-PRINT)
("Keyword_putc" 'TOK-PUTC)
("Keyword_while" 'TOK-WHILE)
("Op_multiply" 'TOK-MULTIPLY)
("Op_divide" 'TOK-DIVIDE)
("Op_mod" 'TOK-MOD)
("Op_add" 'TOK-ADD)
("Op_subtract" 'TOK-SUBTRACT)
("Op_negate" 'TOK-NEGATE)
("Op_less" 'TOK-LESS)
("Op_lessequal" 'TOK-LESSEQUAL)
("Op_greater" 'TOK-GREATER)
("Op_greaterequal" 'TOK-GREATEREQUAL)
("Op_equal" 'TOK-EQUAL)
("Op_notequal" 'TOK-NOTEQUAL)
("Op_not" 'TOK-NOT)
("Op_assign" 'TOK-ASSIGN)
("Op_and" 'TOK-AND)
("Op_or" 'TOK-OR)
("LeftParen" 'TOK-LEFTPAREN)
("RightParen" 'TOK-RIGHTPAREN)
("LeftBrace" 'TOK-LEFTBRACE)
("RightBrace" 'TOK-RIGHTBRACE)
("Semicolon" 'TOK-SEMICOLON)
("Comma" 'TOK-COMMA)
("Identifier" 'TOK-IDENTIFIER)
("Integer" 'TOK-INTEGER)
("String" 'TOK-STRING)
("End_of_input" 'TOK-END-OF-INPUT)
(_ (warn "unparseable token line")
(uiop:quit 1))))
(defun precedence (tok)
(case tok
(TOK-MULTIPLY 13)
(TOK-DIVIDE 13)
(TOK-MOD 13)
(TOK-ADD 12)
(TOK-SUBTRACT 12)
(TOK-NEGATE 14)
(TOK-NOT 14)
(TOK-LESS 10)
(TOK-LESSEQUAL 10)
(TOK-GREATER 10)
(TOK-GREATEREQUAL 10)
(TOK-EQUAL 9)
(TOK-NOTEQUAL 9)
(TOK-AND 5)
(TOK-OR 4)
(otherwise -1)))
(defun binary-p (tok)
(case tok
(TOK-ADD t)
(TOK-SUBTRACT t)
(TOK-MULTIPLY t)
(TOK-DIVIDE t)
(TOK-MOD t)
(TOK-LESS t)
(TOK-LESSEQUAL t)
(TOK-GREATER t)
(TOK-GREATEREQUAL t)
(TOK-EQUAL t)
(TOK-NOTEQUAL t)
(TOK-AND t)
(TOK-OR t)
(otherwise nil)))
(defun right-associative-p (tok)
(declare (ignorable tok))
nil) ; None of the current operators is right associative.
(defun tok-text (tok)
(ecase tok
(TOK-ELSE "else")
(TOK-IF "if")
(TOK-PRINT "print")
(TOK-PUTC "putc")
(TOK-WHILE "while")
(TOK-MULTIPLY "*")
(TOK-DIVIDE "/")
(TOK-MOD "%")
(TOK-ADD "+")
(TOK-SUBTRACT "-")
(TOK-NEGATE "-")
(TOK-LESS "<")
(TOK-LESSEQUAL "<=")
(TOK-GREATER ">")
(TOK-GREATEREQUAL ">=")
(TOK-EQUAL "==")
(TOK-NOTEQUAL "!=")
(TOK-NOT "!")
(TOK-ASSIGN "=")
(TOK-AND "&&")
(TOK-OR "((")
(TOK-LEFTPAREN "(")
(TOK-RIGHTPAREN ")")
(TOK-LEFTBRACE "{")
(TOK-RIGHTBRACE "}")
(TOK-SEMICOLON ";")
(TOK-COMMA ",")
(TOK-IDENTIFIER "Ident")
(TOK-INTEGER "Integer literal")
(TOK-STRING "String literal")
(TOK-END_OF_INPUT "EOI")))
(defun operator (tok)
(ecase tok
(TOK-MULTIPLY "Multiply")
(TOK-DIVIDE "Divide")
(TOK-MOD "Mod")
(TOK-ADD "Add")
(TOK-SUBTRACT "Subtract")
(TOK-NEGATE "Negate")
(TOK-NOT "Not")
(TOK-LESS "Less")
(TOK-LESSEQUAL "LessEqual")
(TOK-GREATER "Greater")
(TOK-GREATEREQUAL "GreaterEqual")
(TOK-EQUAL "Equal")
(TOK-NOTEQUAL "NotEqual")
(TOK-AND "And")
(TOK-OR "Or")))
(defun join (&rest args)
(apply #'concatenate 'string args))
(defun nxt (gettok)
(funcall gettok :nxt))
(defun curr (gettok)
(funcall gettok :curr))
(defun err (token msg)
(format t "(~A, ~A) error: ~A~%"
(tokstruc-line-no token)
(tokstruc-column-no token)
msg)
(uiop:quit 1))
(defun prt-ast (outf ast)
;;
;; For fun, let us do prt-ast *non*-recursively, with a stack and a
;; loop.
;;
(let ((stack `(,ast)))
(loop while stack
do (let ((x (car stack)))
(setf stack (cdr stack))
(cond ((not x) (format outf ";~%"))
((or (string= (car x) "Identifier")
(string= (car x) "Integer")
(string= (car x) "String"))
(format outf "~A ~A~%" (car x) (cadr x)))
(t (format outf "~A~%" (car x))
(setf stack (cons (caddr x) stack))
(setf stack (cons (cadr x) stack))))))))
(defun accept (gettok tok)
(if (eq (tokstruc-tok (curr gettok)) tok)
(nxt gettok)
nil))
(defun expect (gettok msg tok)
(let ((curr-tok (tokstruc-tok (curr gettok))))
(if (eq curr-tok tok)
(nxt gettok)
(err (curr gettok)
(join msg ": Expecting '"
(tok-text tok) "', found '"
(tok-text curr-tok) "'")))))
(defun parse (gettok)
(defun paren-expr (gettok)
(expect gettok "paren_expr" 'TOK-LEFTPAREN)
(let ((x (expr gettok 0)))
(expect gettok "paren_expr" 'TOK-RIGHTPAREN)
x))
(defun expr (gettok p)
(let* ((tok (curr gettok))
(x (case (tokstruc-tok tok)
(TOK-LEFTPAREN (paren-expr gettok))
(TOK-SUBTRACT
(nxt gettok)
(let ((y (expr gettok (precedence 'TOK-NEGATE))))
`("Negate" ,y ())))
(TOK-ADD
(nxt gettok)
(expr gettok (precedence 'TOK-NEGATE)))
(TOK-NOT
(nxt gettok)
(let ((y (expr gettok (precedence 'TOK-NOT))))
`("Not" ,y ())))
(TOK-IDENTIFIER
(let ((y `("Identifier" ,(tokstruc-tokval tok))))
(nxt gettok)
y))
(TOK-INTEGER
(let ((y `("Integer" ,(tokstruc-tokval tok))))
(nxt gettok)
y))
(otherwise
(err tok (join "Expecting a primary, found: "
(tok-text (tokstruc-tok tok))))))))
;;
;; Precedence climbing for binary operators.
;;
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
while (and (binary-p toktok) (<= p (precedence toktok)))
do (progn (nxt gettok)
(let ((q (if (right-associative-p toktok)
(precedence toktok)
(1+ (precedence toktok)))))
(setf x `(,(operator toktok) ,x
,(expr gettok q))))))
x))
(defun stmt (gettok)
(cond ((accept gettok 'TOK-IF)
(let* ((e (paren-expr gettok))
(s (stmt gettok))
(x (if (accept gettok 'TOK-ELSE)
`("If" ,s ,(stmt gettok))
`("If" ,s ()))))
`("If" ,e ,x)))
((accept gettok 'TOK-PUTC)
(let ((x `("Prtc" ,(paren-expr gettok) ())))
(expect gettok "Putc" 'TOK-SEMICOLON)
x))
((accept gettok 'TOK-PRINT)
(expect gettok "Print" 'TOK-LEFTPAREN)
(let ((x '()))
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
for e = (if (eq toktok 'TOK-STRING)
(let* ((tokval (tokstruc-tokval tok))
(leaf `("String" ,tokval))
(e `("Prts" ,leaf ())))
(nxt gettok)
e)
`("Prti" ,(expr gettok 0) ()))
do (setf x `("Sequence" ,x ,e))
while (accept gettok 'TOK-COMMA))
(expect gettok "Print" 'TOK-RIGHTPAREN)
(expect gettok "Print" 'TOK-SEMICOLON)
x))
((eq (tokstruc-tok (curr gettok)) 'TOK-SEMICOLON)
(nxt gettok))
((eq (tokstruc-tok (curr gettok)) 'TOK-IDENTIFIER)
(let ((v `("Identifier" ,(tokstruc-tokval (curr gettok)))))
(nxt gettok)
(expect gettok "assign" 'TOK-ASSIGN)
(let ((x `("Assign" ,v ,(expr gettok 0))))
(expect gettok "assign" 'TOK-SEMICOLON)
x)))
((accept gettok 'TOK-WHILE)
(let ((e (paren-expr gettok)))
`("While" ,e ,(stmt gettok))))
((accept gettok 'TOK-LEFTBRACE)
(let ((x '()))
(loop for tok = (curr gettok)
for toktok = (tokstruc-tok tok)
until (or (eq toktok 'TOK-RIGHTBRACE)
(eq toktok 'TOK-END-OF-INPUT))
do (setf x `("Sequence" ,x ,(stmt gettok))))
(expect gettok "Lbrace" 'TOK-RIGHTBRACE)
x))
((eq (tokstruc-tok (curr gettok)) 'TOK-END-OF-INPUT)
'())
(t (let* ((tok (curr gettok))
(toktok (tokstruc-tok tok)))
(err tok (join "expecting start of statement, found '"
(tok-text toktok) "'"))))))
;;
;; Parsing of the top-level statement sequence.
;;
(let ((x '()))
(nxt gettok)
(loop do (setf x `("Sequence" ,x ,(stmt gettok)))
until (eq (tokstruc-tok (curr gettok)) 'TOK-END-OF-INPUT))
x))
(defun string-to-tokstruc (s)
(let ((strings
(nth-value 1 (ppcre:scan-to-strings re-token-1 s))))
(if strings
(make-tokstruc :line-no (elt strings 0)
:column-no (elt strings 1)
:tok (string-to-tok (elt strings 2))
:tokval nil)
(let ((strings
(nth-value 1 (ppcre:scan-to-strings re-token-2 s))))
(if strings
(make-tokstruc :line-no (elt strings 0)
:column-no (elt strings 1)
:tok (string-to-tok (elt strings 2))
:tokval (elt strings 3))
(progn
(warn "unparseable token line")
(uiop:quit 1)))))))
(defun read-token-line (inpf)
(loop for line = (read-line inpf nil "End_of_input")
while (ppcre:scan re-blank-line line)
finally (return line)))
(defun open-inpf (inpf-filename)
(if (string= inpf-filename "-")
*standard-input*
(open inpf-filename :direction :input)))
(defun open-outf (outf-filename)
(if (string= outf-filename "-")
*standard-output*
(open outf-filename :direction :output
:if-exists :overwrite
:if-does-not-exist :create)))
(defun usage-error ()
(princ "Usage: parse [INPUTFILE [OUTPUTFILE]]" *standard-output*)
(terpri *standard-output*)
(princ "If either INPUTFILE or OUTPUTFILE is \"-\", the respective"
*standard-output*)
(princ " standard I/O is used." *standard-output*)
(terpri *standard-output*)
(uiop:quit 1))
(defun get-filenames (argv)
(trivia:match argv
((list) '("-" "-"))
((list inpf-filename) `(,inpf-filename "-"))
((list inpf-filename outf-filename) `(,inpf-filename
,outf-filename))
(_ (usage-error))))
(defun main (&rest argv)
(let* ((filenames (get-filenames argv))
(inpf-filename (car filenames))
(inpf (open-inpf inpf-filename))
(outf-filename (cadr filenames))
(outf (open-outf outf-filename)))
(let* ((current-token (list nil))
(gettok-curr (lambda () (elt current-token 0)))
(gettok-nxt (lambda ()
(let* ((s (read-token-line inpf))
(tok (string-to-tokstruc s)))
(setf (elt current-token 0) tok)
tok)))
(gettok (lambda (instruction)
(trivia:match instruction
(:curr (funcall gettok-curr))
(:nxt (funcall gettok-nxt)))))
(ast (parse gettok)))
(prt-ast outf ast))
(unless (string= inpf-filename "-")
(close inpf))
(unless (string= outf-filename "-")
(close outf))
(uiop:quit 0)))
;;; vim: set ft=lisp lisp: