RosettaCodeData/Task/Compiler-syntax-analyzer/ALGOL-W/compiler-syntax-analyzer.alg

396 lines
21 KiB
Plaintext

begin % syntax analyser %
% parse tree nodes %
record node( integer type
; reference(node) left, right
; integer iValue % nString/nIndentifier number or nInteger value %
);
integer nIdentifier, nString, nInteger, nSequence, nIf, nPrtc, nPrts
, nPrti, nWhile, nAssign, nNegate, nNot, nMultiply
, nDivide, nMod, nAdd, nSubtract, nLess, nLessEqual
, nGreater, nGreaterEqual, nEqual, nNotEqual, nAnd, nOr
;
string(14) array ndName ( 1 :: 25 );
% tokens - names must match those output by the lexical analyser %
integer tkType, tkLine, tkColumn, tkLength, tkIntegerValue;
integer tOp_multiply , tOp_divide , tOp_mod , tOp_add
, tOp_subtract , tOp_negate , tOp_less , tOp_lessequal
, tOp_greater , tOp_greaterequal , tOp_equal , tOp_notequal
, tOp_not , tOp_assign , tOp_and , tOp_or
, tLeftParen , tRightParen , tLeftBrace , tRightBrace
, tSemicolon , tComma , tKeyword_if , tKeyword_else
, tKeyword_while , tKeyword_print , tKeyword_putc , tIdentifier
, tInteger , tString , tEnd_of_input
, MAX_TOKEN_TYPE, PRIMARY_PREC
;
string(16) array tkName ( 1 :: 31 );
integer array tkPrec, tkNode ( 1 :: 31 );
% string literals and identifiers - uses a linked list - a hash table might be better... %
string(1) array text ( 0 :: 4095 );
integer textNext, TEXT_MAX;
record textElement ( integer start, length; reference(textElement) next );
reference(textElement) idList, stList;
% returns a new node with left and right branches %
reference(node) procedure opNode ( integer value opType; reference(node) value opLeft, opRight ) ; begin
node( opType, opLeft, opRight, 0 )
end opNode ;
% returns a new operand node %
reference(node) procedure operandNode ( integer value opType, opValue ) ; begin
node( opType, null, null, opValue )
end operandNode ;
% reports an error %
procedure synError( integer value line, column; string(80) value message ); begin
integer errorPos;
write( i_w := 1, s_w := 0, "**** Error at(", line, ",", column, "): " );
errorPos := 0;
while errorPos < 80 and message( errorPos // 1 ) not = "." do begin
writeon( s_w := 0, message( errorPos // 1 ) );
errorPos := errorPos + 1
end while_not_at_end_of_message ;
writeon( s_w := 0, "." )
end synError ;
% reports an error and stops %
procedure fatalError( integer value line, column; string(80) value message ); begin
synError( line, column, message );
assert( false )
end fatalError ;
% prints a node and its sub-nodes %
procedure writeNode( reference(node) value n ) ; begin
% prints an identifier or string from text %
procedure writeOnText( reference(textElement) value txHead; integer value txNumber ) ;
begin
reference(textElement) txPos;
integer count;
txPos := txHead;
count := 1;
while count < txNumber and txPos not = null do begin
txPos := next(txPos);
count := count + 1
end while_text_element_not_found ;
if txPos = null then fatalError( 0, txNumber, "INTERNAL ERROR: text not found." )
else for cPos := 0 until length(txPos) - 1 do writeon( text( start(txPos) + cPos ) );
if text( start(txPos) ) = """" then writeon( """" );
end writeOnText ;
if n = null then write( ";" )
else begin
write( ndName( type(n) ) );
if type(n) = nInteger then writeon( iValue(n) )
else if type(n) = nIdentifier then writeOnText( idList, iValue(n) )
else if type(n) = nString then writeOnText( stList, iValue(n) )
else begin
writeNode( left(n) );
writeNode( right(n) )
end
end
end writeNode ;
% reads a token from standard input %
procedure readToken ; begin
% parses a string from line and stores it in a string in the text array %
% - if it is not already present in the specified textElement list. %
% returns the position of the string in the text array %
integer procedure readString ( reference(textElement) value result txList; string(1) value terminator ) ; begin
string(256) str;
integer sLen, sPos, ePos;
logical found;
reference(textElement) txPos, txLastPos;
% get the text of the string %
str := " ";
sLen := 0;
str( sLen // 1 ) := line( lPos // 1 );
sLen := sLen + 1;
lPos := lPos + 1;
while lPos <= 255 and line( lPos // 1 ) not = terminator do begin
str( sLen // 1 ) := line( lPos // 1 );
sLen := sLen + 1;
lPos := lPos + 1
end while_more_string ;
if lPos > 255 then fatalError( tkLine, tkColumn, "Unterminated String in token file." );
% attempt to find the text in the list of strings/identifiers %
txLastPos := txPos := txList;
found := false;
ePos := 0;
while not found and txPos not = null do begin
ePos := ePos + 1;
found := ( length(txPos) = sLen );
sPos := 0;
while found and sPos < sLen do begin
found := str( sPos // 1 ) = text( start(txPos) + sPos );
sPos := sPos + 1
end while_not_found ;
txLastPos := txPos;
if not found then txPos := next(txPos)
end while_string_not_found ;
if not found then begin
% the string/identifier is not in the list - add it %
ePos := ePos + 1;
if txList = null then txList := textElement( textNext, sLen, null )
else next(txLastPos) := textElement( textNext, sLen, null );
if textNext + sLen > TEXT_MAX then fatalError( tkLine, tkColumn, "Text space exhausted." )
else begin
for cPos := 0 until sLen - 1 do begin
text( textNext ) := str( cPos // 1 );
textNext := textNext + 1
end for_cPos
end
end if_not_found ;
ePos
end readString ;
% gets an integer from the line - no checks for valid digits %
integer procedure readInteger ; begin
integer n;
while line( lPos // 1 ) = " " do lPos := lPos + 1;
n := 0;
while line( lPos // 1 ) not = " " do begin
n := ( n * 10 ) + ( decode( line( lPos // 1 ) ) - decode( "0" ) );
lPos := lPos + 1
end while_not_end_of_integer ;
n
end readInteger ;
string(256) line;
string(16) name;
integer lPos, tPos;
tPos := lPos := 0;
readcard( line );
% get the line and column numbers %
tkLine := readInteger;
tkColumn := readInteger;
% get the token name %
while line( lPos // 1 ) = " " do lPos := lPos + 1;
name := "";
while lPos < 256 and line( lPos // 1 ) not = " " do begin
name( tPos // 1 ) := line( lPos // 1 );
lPos := lPos + 1;
tPos := tPos + 1
end while_more_name ;
% determine the token type %
tkType := 1;
tkIntegerValue := 0;
while tkType <= MAX_TOKEN_TYPE and name not = tkName( tkType ) do tkType := tkType + 1;
if tkType > MAX_TOKEN_TYPE then fatalError( tkLine, tkColumn, "Malformed token" );
% handle the additional parameter for identifier/string/integer %
if tkType = tInteger or tkType = tIdentifier or tkType = tString then begin
while line( lPos // 1 ) = " " do lPos := lPos + 1;
if tkType = tInteger then tkIntegerValue := readInteger
else if tkType = tIdentifier then tkIntegerValue := readString( idList, " " )
else % tkType = tString % tkIntegerValue := readString( stList, """" )
end if_token_with_additional_parameter ;
end readToken ;
% parses a statement %
reference(node) procedure parseStatement ; begin
reference(node) stmtNode, stmtExpr;
% skips the current token if it is expectedToken, %
% returns true if the token was expectedToken, false otherwise %
logical procedure have ( integer value expectedToken ) ; begin
logical haveExpectedToken;
haveExpectedToken := ( tkType = expectedToken );
if haveExpectedToken and tkType not = tEnd_of_input then readToken;
haveExpectedToken
end have ;
% issues an error message and skips past the next semi-colon or to end of input %
procedure skipStatement ( string(80) value message ) ; begin
synError( tkLine, tkColumn, message );
while tkType not = tEnd_of_input and not have( tSemicolon ) do readToken
end skipStatement ;
% checks we have a semicolon, issues an error and skips the statement if not %
procedure mustBeEndOfStatement ; begin
if not have( tSemicolon ) then skipStatement( """;"" expected." )
end mustBeEndOfStatement ;
% skips the current token if it is "(" and issues an error if it isn't %
procedure mustBeLeftParen ; begin
if not have( tLeftParen ) then synError( tkLine, tkColumn, """("" expected." )
end % mustBeLeftParen % ;
% skips the current token if it is ")" and issues an error if it isn't %
procedure mustBeRightParen ; begin
if not have( tRightParen ) then synError( tkLine, tkColumn, """)"" expected." )
end % mustBeRightParen % ;
% gets the next token and parses an expression with the specified precedence %
reference(node) procedure nextAndparseExpr ( integer value precedence ) ; begin
readToken;
parseExpr( precedence )
end nextAndParseExpr ;
% parses an expression with the specified precedence %
% all operators are assumed to be left-associative %
reference(node) procedure parseExpr ( integer value precedence ) ; begin
% handles a single token primary %
reference(node) procedure simplePrimary ( integer value primaryNodeType ) ; begin
reference(node) primaryNode;
primaryNode := operandNode( primaryNodeType, tkIntegerValue );
readToken;
primaryNode
end simplePrimary ;
reference(node) exprNode;
if precedence < PRIMARY_PREC then begin
exprNode := parseExpr( precedence + 1 );
while tkPrec( tkType ) = precedence do begin
integer op;
op := tkNode( tkType );
exprNode := opNode( op, exprNode, nextAndParseExpr( precedence + 1 ) )
end while_op_at_this_precedence_level
end
else if tkType = tIdentifier then exprNode := simplePrimary( nIdentifier )
else if tkType = tInteger then exprNode := simplePrimary( nInteger )
else if tkType = nString then begin
synError( tkLine, tkColumn, "Unexpected string literal." );
exprNode := simplePrimary( nInteger )
end
else if tkType = tLeftParen then exprNode := parseParenExpr
else if tkType = tOp_add then exprNode := nextAndParseExpr( precedence )
else if tkType = tOp_subtract then exprNode := opNode( nNegate, nextAndParseExpr( precedence ), null )
else if tkType = tOp_not then exprNode := opNode( nNot, nextAndParseExpr( precedence ), null )
else begin
synError( tkLine, tkColumn, "Syntax error in expression." );
exprNode := simplePrimary( nInteger )
end;
exprNode
end parseExpr ;
% parses a preenthesised expression %
reference(node) procedure parseParenExpr ; begin
reference(node) exprNode;
mustBeLeftParen;
exprNode := parseExpr( 0 );
mustBeRightParen;
exprNode
end parseParenExpr ;
% parse statement depending on it's first token %
if tkType = tIdentifier then begin % assignment statement %
stmtExpr := operandNode( nIdentifier, tkIntegerValue );
% skip the identifier and check for "=" %
readToken;
if not have( tOp_Assign ) then synError( tkLine, tkColumn, "Expected ""="" in assignment statement." );
stmtNode := opNode( nAssign, stmtExpr, parseExpr( 0 ) );
mustBeEndOfStatement
end
else if have( tKeyword_while ) then begin
stmtExpr := parseParenExpr;
stmtNode := opNode( nWhile, stmtExpr, parseStatement )
end
else if have( tkeyword_if ) then begin
stmtExpr := parseParenExpr;
stmtNode := opNode( nIf, stmtExpr, opNode( nIf, parseStatement, null ) );
if have( tKeyword_else ) then % have an "else" part % right(right(stmtNode)) := parseStatement
end
else if have( tKeyword_Print ) then begin
mustBeLeftParen;
stmtNode := null;
while begin
if tkType = tString then begin
stmtNode := opNode( nSequence
, stmtNode
, opNode( nPrts, operandNode( nString, tkIntegerValue ), null )
);
readToken
end
else stmtNode := opNode( nSequence, stmtNode, opNode( nPrti, parseExpr( 0 ), null ) );
have( tComma )
end do begin end;
mustBeRightparen;
mustBeEndOfStatement;
end
else if have( tKeyword_Putc ) then begin
stmtNode := opNode( nPrtc, parseParenExpr, null );
mustBeEndOfStatement
end
else if have( tLeftBrace ) then begin % block %
stmtNode := parseStatementList( tRightBrace );
if not have( tRightBrace ) then synError( tkLine, tkColumn, "Expected ""}""." );
end
else if have( tSemicolon ) then stmtNode := null
else begin % unrecognised statement %
skipStatement( "Unrecognised statement." );
stmtNode := null
end if_various_tokens ;
stmtNode
end parseStatement ;
% parses a statement list ending with the specified terminator %
reference(node) procedure parseStatementList ( integer value terminator ) ; begin
reference(node) listNode;
listNode := null;
while tkType not = terminator
and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement );
listNode
end parseStatementList ;
% sets a node code and name %
procedure setNode ( integer result nd; integer value ndCode; string(14) value name ) ;
begin nd := ndCode; ndName( ndCode ) := name end;
setNode( nIdentifier, 1, "Identifier" ); setNode( nString, 2, "String" );
setNode( nInteger, 3, "Integer" ); setNode( nSequence, 4, "Sequence" ); setNode( nIf, 5, "If" );
setNode( nPrtc, 6, "Prtc" ); setNode( nPrts, 7, "Prts" );
setNode( nPrti, 8, "Prti" ); setNode( nWhile, 9, "While" );
setNode( nAssign, 10, "Assign" ); setNode( nNegate, 11, "Negate" ); setNode( nNot, 12, "Not" );
setNode( nMultiply, 13, "Multiply" ); setNode( nDivide, 14, "Divide" ); setNode( nMod, 15, "Mod" );
setNode( nAdd, 16, "Add" ); setNode( nSubtract, 17, "Subtract" );
setNode( nLess, 18, "Less" ); setNode( nLessEqual, 19, "LessEqual" );
setNode( nGreater, 20, "Greater" );
setNode( nGreaterEqual, 21, "GreaterEqual" ); setNode( nEqual, 22, "Equal" );
setNode( nNotEqual, 23, "NotEqual" ); setNode( nAnd, 24, "And" ); setNode( nOr, 25, "Or" );
tOp_multiply := 1; tkName( tOp_multiply ) := "Op_multiply"; tkPrec( tOp_multiply ) := 5;
tOp_divide := 2; tkName( tOp_divide ) := "Op_divide"; tkPrec( tOp_divide ) := 5;
tOp_mod := 3; tkName( tOp_mod ) := "Op_mod"; tkPrec( tOp_mod ) := 5;
tOp_add := 4; tkName( tOp_add ) := "Op_add"; tkPrec( tOp_add ) := 4;
tOp_subtract := 5; tkName( tOp_subtract ) := "Op_subtract"; tkPrec( tOp_subtract ) := 4;
tOp_negate := 6; tkName( tOp_negate ) := "Op_negate"; tkPrec( tOp_negate ) := -1;
tOp_less := 7; tkName( tOp_less ) := "Op_less"; tkPrec( tOp_less ) := 3;
tOp_lessequal := 8; tkName( tOp_lessequal ) := "Op_lessequal"; tkPrec( tOp_lessequal ) := 3;
tOp_greater := 9; tkName( tOp_greater ) := "Op_greater"; tkPrec( tOp_greater ) := 3;
tOp_greaterequal := 10; tkName( tOp_greaterequal ) := "Op_greaterequal"; tkPrec( tOp_greaterequal ) := 3;
tOp_equal := 11; tkName( tOp_equal ) := "Op_equal"; tkPrec( tOp_equal ) := 2;
tOp_notequal := 12; tkName( tOp_notequal ) := "Op_notequal"; tkPrec( tOp_notequal ) := 2;
tOp_not := 13; tkName( tOp_not ) := "Op_not"; tkPrec( tOp_not ) := -1;
tOp_assign := 14; tkName( tOp_assign ) := "Op_assign"; tkPrec( tOp_assign ) := -1;
tOp_and := 15; tkName( tOp_and ) := "Op_and"; tkPrec( tOp_and ) := 1;
tOp_or := 16; tkName( tOp_or ) := "Op_or"; tkPrec( tOp_or ) := 0;
tLeftParen := 17; tkName( tLeftParen ) := "LeftParen"; tkPrec( tLeftParen ) := -1;
tRightParen := 18; tkName( tRightParen ) := "RightParen"; tkPrec( tRightParen ) := -1;
tLeftBrace := 19; tkName( tLeftBrace ) := "LeftBrace"; tkPrec( tLeftBrace ) := -1;
tRightBrace := 20; tkName( tRightBrace ) := "RightBrace"; tkPrec( tRightBrace ) := -1;
tSemicolon := 21; tkName( tSemicolon ) := "Semicolon"; tkPrec( tSemicolon ) := -1;
tComma := 22; tkName( tComma ) := "Comma"; tkPrec( tComma ) := -1;
tKeyword_if := 23; tkName( tKeyword_if ) := "Keyword_if"; tkPrec( tKeyword_if ) := -1;
tKeyword_else := 24; tkName( tKeyword_else ) := "Keyword_else"; tkPrec( tKeyword_else ) := -1;
tKeyword_while := 25; tkName( tKeyword_while ) := "Keyword_while"; tkPrec( tKeyword_while ) := -1;
tKeyword_print := 26; tkName( tKeyword_print ) := "Keyword_print"; tkPrec( tKeyword_print ) := -1;
tKeyword_putc := 27; tkName( tKeyword_putc ) := "Keyword_putc"; tkPrec( tKeyword_putc ) := -1;
tIdentifier := 28; tkName( tIdentifier ) := "Identifier"; tkPrec( tIdentifier ) := -1;
tInteger := 29; tkName( tInteger ) := "Integer"; tkPrec( tInteger ) := -1;
tString := 30; tkName( tString ) := "String"; tkPrec( tString ) := -1;
tEnd_of_input := 31; tkName( tEnd_of_input ) := "End_of_input"; tkPrec( tEnd_of_input ) := -1;
MAX_TOKEN_TYPE := 31; TEXT_MAX := 4095; textNext := 0; PRIMARY_PREC := 6;
for tkPos := 1 until MAX_TOKEN_TYPE do tkNode( tkPos ) := - tkPos;
tkNode( tOp_multiply ) := nMultiply; tkNode( tOp_divide ) := nDivide; tkNode( tOp_mod ) := nMod;
tkNode( tOp_add ) := nAdd; tkNode( tOp_subtract ) := nSubtract; tkNode( tOp_less ) := nLess;
tkNode( tOp_lessequal ) := nLessEqual; tkNode( tOp_greater ) := nGreater;
tkNode( tOp_greaterequal ) := nGreaterEqual;
tkNode( tOp_equal ) := nEqual; tkNode( tOp_notequal ) := nNotEqual; tkNode( tOp_not ) := nNot;
tkNode( tOp_and ) := nAnd; tkNode( tOp_or ) := nOr;
stList := idList := null;
% parse the output from the lexical analyser and output the linearised parse tree %
readToken;
writeNode( parseStatementList( tEnd_of_input ) )
end.