RosettaCodeData/Task/Compiler-syntax-analyzer/Perl/compiler-syntax-analyzer.pl

75 lines
3.1 KiB
Perl

#!/usr/bin/perl
use strict; # parse.pl - inputs lex, outputs flattened ast
use warnings; # http://www.rosettacode.org/wiki/Compiler/syntax_analyzer
my $h = qr/\G\s*\d+\s+\d+\s+/; # header of each line
sub error { die "*** Expected @_ at " . (/\G(.*\n)/ ?
$1 =~ s/^\s*(\d+)\s+(\d+)\s+/line $1 character $2 got /r : "EOF\n") }
sub want { /$h \Q$_[1]\E.*\n/gcx ? shift : error "'$_[1]'" }
local $_ = join '', <>;
print want stmtlist(), 'End_of_input';
sub stmtlist
{
/(?=$h (RightBrace|End_of_input))/gcx and return ";\n";
my ($stmt, $stmtlist) = (stmt(), stmtlist());
$stmtlist eq ";\n" ? $stmt : "Sequence\n$stmt$stmtlist";
}
sub stmt
{
/$h Semicolon\n/gcx ? ";\n" :
/$h Identifier \s+ (\w+) \n/gcx ? want("Assign\nIdentifier\t$1\n",
'Op_assign') . want expr(0), 'Semicolon' :
/$h Keyword_while \n/gcx ? "While\n" . parenexp() . stmt() :
/$h Keyword_if \n/gcx ? "If\n" . parenexp() . "If\n" . stmt() .
(/$h Keyword_else \n/gcx ? stmt() : ";\n") :
/$h Keyword_print \n/gcx ? want('', 'LeftParen') .
want want(printlist(), 'RightParen'), 'Semicolon' :
/$h Keyword_putc \n/gcx ? want "Prtc\n" . parenexp() . ";\n", 'Semicolon' :
/$h LeftBrace \n/gcx ? want stmtlist(), 'RightBrace' :
error 'A STMT';
}
sub parenexp { want('', 'LeftParen') . want expr(0), 'RightParen' } # (expr)
sub printlist
{
my $ast = /$h String \s+ (".*") \n/gcx ?
"Prts\nString\t\t$1\n;\n" : "Prti\n" . expr(0) . ";\n";
/$h Comma \n/gcx ? "Sequence\n$ast" . printlist() : $ast;
}
sub expr # (sort of EBNF) expr = operand { operator expr }
{
my $ast = # operand
/$h Integer \s+ (\d+) \n/gcx ? "Integer\t\t$1\n" :
/$h Identifier \s+ (\w+) \n/gcx ? "Identifier\t$1\n" :
/$h LeftParen \n/gcx ? want expr(0), 'RightParen' :
/$h Op_(negate|subtract) \n/gcx ? "Negate\n" . expr(8) . ";\n" :
/$h Op_not \n/gcx ? "Not\n" . expr(8) . ";\n" :
/$h Op_add \n/gcx ? expr(8) :
error "A PRIMARY";
$ast = # { operator expr }
$_[0] <= 7 && /$h Op_multiply \n/gcx ? "Multiply\n$ast" . expr(8) :
$_[0] <= 7 && /$h Op_divide \n/gcx ? "Divide\n$ast" . expr(8) :
$_[0] <= 7 && /$h Op_mod \n/gcx ? "Mod\n$ast" . expr(8) :
$_[0] <= 6 && /$h Op_add \n/gcx ? "Add\n$ast" . expr(7) :
$_[0] <= 6 && /$h Op_subtract \n/gcx ? "Subtract\n$ast" . expr(7) :
$_[0] == 5 && /(?=$h Op_(less|greater)(equal)? \n)/gcx ? error 'NO ASSOC' :
$_[0] <= 5 && /$h Op_lessequal \n/gcx ? "LessEqual\n$ast" . expr(5) :
$_[0] <= 5 && /$h Op_less \n/gcx ? "Less\n$ast" . expr(5) :
$_[0] <= 5 && /$h Op_greater \n/gcx ? "Greater\n$ast" . expr(5) :
$_[0] <= 5 && /$h Op_greaterequal \n/gcx ? "GreaterEqual\n$ast" . expr(5) :
$_[0] == 3 && /(?=$h Op_(not)?equal \n)/gcx ? error 'NO ASSOC' :
$_[0] <= 3 && /$h Op_equal \n/gcx ? "Equal\n$ast" . expr(3) :
$_[0] <= 3 && /$h Op_notequal \n/gcx ? "NotEqual\n$ast" . expr(3) :
$_[0] <= 1 && /$h Op_and \n/gcx ? "And\n$ast" . expr(2) :
$_[0] <= 0 && /$h Op_or \n/gcx ? "Or\n$ast" . expr(1) :
return $ast while 1;
}