begin % syntax analyser % % parse tree nodes % record node( integer type ; reference(node) left, right ; integer iValue % nString/nIndentifier number or nInteger value % ); integer nIdentifier, nString, nInteger, nSequence, nIf, nPrtc, nPrts , nPrti, nWhile, nAssign, nNegate, nNot, nMultiply , nDivide, nMod, nAdd, nSubtract, nLess, nLessEqual , nGreater, nGreaterEqual, nEqual, nNotEqual, nAnd, nOr ; string(14) array ndName ( 1 :: 25 ); % tokens - names must match those output by the lexical analyser % integer tkType, tkLine, tkColumn, tkLength, tkIntegerValue; integer tOp_multiply , tOp_divide , tOp_mod , tOp_add , tOp_subtract , tOp_negate , tOp_less , tOp_lessequal , tOp_greater , tOp_greaterequal , tOp_equal , tOp_notequal , tOp_not , tOp_assign , tOp_and , tOp_or , tLeftParen , tRightParen , tLeftBrace , tRightBrace , tSemicolon , tComma , tKeyword_if , tKeyword_else , tKeyword_while , tKeyword_print , tKeyword_putc , tIdentifier , tInteger , tString , tEnd_of_input , MAX_TOKEN_TYPE, PRIMARY_PREC ; string(16) array tkName ( 1 :: 31 ); integer array tkPrec, tkNode ( 1 :: 31 ); % string literals and identifiers - uses a linked list - a hash table might be better... % string(1) array text ( 0 :: 4095 ); integer textNext, TEXT_MAX; record textElement ( integer start, length; reference(textElement) next ); reference(textElement) idList, stList; % returns a new node with left and right branches % reference(node) procedure opNode ( integer value opType; reference(node) value opLeft, opRight ) ; begin node( opType, opLeft, opRight, 0 ) end opNode ; % returns a new operand node % reference(node) procedure operandNode ( integer value opType, opValue ) ; begin node( opType, null, null, opValue ) end operandNode ; % reports an error % procedure synError( integer value line, column; string(80) value message ); begin integer errorPos; write( i_w := 1, s_w := 0, "**** Error at(", line, ",", column, "): " ); errorPos := 0; while errorPos < 80 and message( errorPos // 1 ) not = "." do begin writeon( s_w := 0, message( errorPos // 1 ) ); errorPos := errorPos + 1 end while_not_at_end_of_message ; writeon( s_w := 0, "." ) end synError ; % reports an error and stops % procedure fatalError( integer value line, column; string(80) value message ); begin synError( line, column, message ); assert( false ) end fatalError ; % prints a node and its sub-nodes % procedure writeNode( reference(node) value n ) ; begin % prints an identifier or string from text % procedure writeOnText( reference(textElement) value txHead; integer value txNumber ) ; begin reference(textElement) txPos; integer count; txPos := txHead; count := 1; while count < txNumber and txPos not = null do begin txPos := next(txPos); count := count + 1 end while_text_element_not_found ; if txPos = null then fatalError( 0, txNumber, "INTERNAL ERROR: text not found." ) else for cPos := 0 until length(txPos) - 1 do writeon( text( start(txPos) + cPos ) ); if text( start(txPos) ) = """" then writeon( """" ); end writeOnText ; if n = null then write( ";" ) else begin write( ndName( type(n) ) ); if type(n) = nInteger then writeon( iValue(n) ) else if type(n) = nIdentifier then writeOnText( idList, iValue(n) ) else if type(n) = nString then writeOnText( stList, iValue(n) ) else begin writeNode( left(n) ); writeNode( right(n) ) end end end writeNode ; % reads a token from standard input % procedure readToken ; begin % parses a string from line and stores it in a string in the text array % % - if it is not already present in the specified textElement list. % % returns the position of the string in the text array % integer procedure readString ( reference(textElement) value result txList; string(1) value terminator ) ; begin string(256) str; integer sLen, sPos, ePos; logical found; reference(textElement) txPos, txLastPos; % get the text of the string % str := " "; sLen := 0; str( sLen // 1 ) := line( lPos // 1 ); sLen := sLen + 1; lPos := lPos + 1; while lPos <= 255 and line( lPos // 1 ) not = terminator do begin str( sLen // 1 ) := line( lPos // 1 ); sLen := sLen + 1; lPos := lPos + 1 end while_more_string ; if lPos > 255 then fatalError( tkLine, tkColumn, "Unterminated String in token file." ); % attempt to find the text in the list of strings/identifiers % txLastPos := txPos := txList; found := false; ePos := 0; while not found and txPos not = null do begin ePos := ePos + 1; found := ( length(txPos) = sLen ); sPos := 0; while found and sPos < sLen do begin found := str( sPos // 1 ) = text( start(txPos) + sPos ); sPos := sPos + 1 end while_not_found ; txLastPos := txPos; if not found then txPos := next(txPos) end while_string_not_found ; if not found then begin % the string/identifier is not in the list - add it % ePos := ePos + 1; if txList = null then txList := textElement( textNext, sLen, null ) else next(txLastPos) := textElement( textNext, sLen, null ); if textNext + sLen > TEXT_MAX then fatalError( tkLine, tkColumn, "Text space exhausted." ) else begin for cPos := 0 until sLen - 1 do begin text( textNext ) := str( cPos // 1 ); textNext := textNext + 1 end for_cPos end end if_not_found ; ePos end readString ; % gets an integer from the line - no checks for valid digits % integer procedure readInteger ; begin integer n; while line( lPos // 1 ) = " " do lPos := lPos + 1; n := 0; while line( lPos // 1 ) not = " " do begin n := ( n * 10 ) + ( decode( line( lPos // 1 ) ) - decode( "0" ) ); lPos := lPos + 1 end while_not_end_of_integer ; n end readInteger ; string(256) line; string(16) name; integer lPos, tPos; tPos := lPos := 0; readcard( line ); % get the line and column numbers % tkLine := readInteger; tkColumn := readInteger; % get the token name % while line( lPos // 1 ) = " " do lPos := lPos + 1; name := ""; while lPos < 256 and line( lPos // 1 ) not = " " do begin name( tPos // 1 ) := line( lPos // 1 ); lPos := lPos + 1; tPos := tPos + 1 end while_more_name ; % determine the token type % tkType := 1; tkIntegerValue := 0; while tkType <= MAX_TOKEN_TYPE and name not = tkName( tkType ) do tkType := tkType + 1; if tkType > MAX_TOKEN_TYPE then fatalError( tkLine, tkColumn, "Malformed token" ); % handle the additional parameter for identifier/string/integer % if tkType = tInteger or tkType = tIdentifier or tkType = tString then begin while line( lPos // 1 ) = " " do lPos := lPos + 1; if tkType = tInteger then tkIntegerValue := readInteger else if tkType = tIdentifier then tkIntegerValue := readString( idList, " " ) else % tkType = tString % tkIntegerValue := readString( stList, """" ) end if_token_with_additional_parameter ; end readToken ; % parses a statement % reference(node) procedure parseStatement ; begin reference(node) stmtNode, stmtExpr; % skips the current token if it is expectedToken, % % returns true if the token was expectedToken, false otherwise % logical procedure have ( integer value expectedToken ) ; begin logical haveExpectedToken; haveExpectedToken := ( tkType = expectedToken ); if haveExpectedToken and tkType not = tEnd_of_input then readToken; haveExpectedToken end have ; % issues an error message and skips past the next semi-colon or to end of input % procedure skipStatement ( string(80) value message ) ; begin synError( tkLine, tkColumn, message ); while tkType not = tEnd_of_input and not have( tSemicolon ) do readToken end skipStatement ; % checks we have a semicolon, issues an error and skips the statement if not % procedure mustBeEndOfStatement ; begin if not have( tSemicolon ) then skipStatement( """;"" expected." ) end mustBeEndOfStatement ; % skips the current token if it is "(" and issues an error if it isn't % procedure mustBeLeftParen ; begin if not have( tLeftParen ) then synError( tkLine, tkColumn, """("" expected." ) end % mustBeLeftParen % ; % skips the current token if it is ")" and issues an error if it isn't % procedure mustBeRightParen ; begin if not have( tRightParen ) then synError( tkLine, tkColumn, """)"" expected." ) end % mustBeRightParen % ; % gets the next token and parses an expression with the specified precedence % reference(node) procedure nextAndparseExpr ( integer value precedence ) ; begin readToken; parseExpr( precedence ) end nextAndParseExpr ; % parses an expression with the specified precedence % % all operators are assumed to be left-associative % reference(node) procedure parseExpr ( integer value precedence ) ; begin % handles a single token primary % reference(node) procedure simplePrimary ( integer value primaryNodeType ) ; begin reference(node) primaryNode; primaryNode := operandNode( primaryNodeType, tkIntegerValue ); readToken; primaryNode end simplePrimary ; reference(node) exprNode; if precedence < PRIMARY_PREC then begin exprNode := parseExpr( precedence + 1 ); while tkPrec( tkType ) = precedence do begin integer op; op := tkNode( tkType ); exprNode := opNode( op, exprNode, nextAndParseExpr( precedence + 1 ) ) end while_op_at_this_precedence_level end else if tkType = tIdentifier then exprNode := simplePrimary( nIdentifier ) else if tkType = tInteger then exprNode := simplePrimary( nInteger ) else if tkType = nString then begin synError( tkLine, tkColumn, "Unexpected string literal." ); exprNode := simplePrimary( nInteger ) end else if tkType = tLeftParen then exprNode := parseParenExpr else if tkType = tOp_add then exprNode := nextAndParseExpr( precedence ) else if tkType = tOp_subtract then exprNode := opNode( nNegate, nextAndParseExpr( precedence ), null ) else if tkType = tOp_not then exprNode := opNode( nNot, nextAndParseExpr( precedence ), null ) else begin synError( tkLine, tkColumn, "Syntax error in expression." ); exprNode := simplePrimary( nInteger ) end; exprNode end parseExpr ; % parses a preenthesised expression % reference(node) procedure parseParenExpr ; begin reference(node) exprNode; mustBeLeftParen; exprNode := parseExpr( 0 ); mustBeRightParen; exprNode end parseParenExpr ; % parse statement depending on it's first token % if tkType = tIdentifier then begin % assignment statement % stmtExpr := operandNode( nIdentifier, tkIntegerValue ); % skip the identifier and check for "=" % readToken; if not have( tOp_Assign ) then synError( tkLine, tkColumn, "Expected ""="" in assignment statement." ); stmtNode := opNode( nAssign, stmtExpr, parseExpr( 0 ) ); mustBeEndOfStatement end else if have( tKeyword_while ) then begin stmtExpr := parseParenExpr; stmtNode := opNode( nWhile, stmtExpr, parseStatement ) end else if have( tkeyword_if ) then begin stmtExpr := parseParenExpr; stmtNode := opNode( nIf, stmtExpr, opNode( nIf, parseStatement, null ) ); if have( tKeyword_else ) then % have an "else" part % right(right(stmtNode)) := parseStatement end else if have( tKeyword_Print ) then begin mustBeLeftParen; stmtNode := null; while begin if tkType = tString then begin stmtNode := opNode( nSequence , stmtNode , opNode( nPrts, operandNode( nString, tkIntegerValue ), null ) ); readToken end else stmtNode := opNode( nSequence, stmtNode, opNode( nPrti, parseExpr( 0 ), null ) ); have( tComma ) end do begin end; mustBeRightparen; mustBeEndOfStatement; end else if have( tKeyword_Putc ) then begin stmtNode := opNode( nPrtc, parseParenExpr, null ); mustBeEndOfStatement end else if have( tLeftBrace ) then begin % block % stmtNode := parseStatementList( tRightBrace ); if not have( tRightBrace ) then synError( tkLine, tkColumn, "Expected ""}""." ); end else if have( tSemicolon ) then stmtNode := null else begin % unrecognised statement % skipStatement( "Unrecognised statement." ); stmtNode := null end if_various_tokens ; stmtNode end parseStatement ; % parses a statement list ending with the specified terminator % reference(node) procedure parseStatementList ( integer value terminator ) ; begin reference(node) listNode; listNode := null; while tkType not = terminator and tkType not = tEnd_of_input do listNode := opNode( nSequence, listNode, parseStatement ); listNode end parseStatementList ; % sets a node code and name % procedure setNode ( integer result nd; integer value ndCode; string(14) value name ) ; begin nd := ndCode; ndName( ndCode ) := name end; setNode( nIdentifier, 1, "Identifier" ); setNode( nString, 2, "String" ); setNode( nInteger, 3, "Integer" ); setNode( nSequence, 4, "Sequence" ); setNode( nIf, 5, "If" ); setNode( nPrtc, 6, "Prtc" ); setNode( nPrts, 7, "Prts" ); setNode( nPrti, 8, "Prti" ); setNode( nWhile, 9, "While" ); setNode( nAssign, 10, "Assign" ); setNode( nNegate, 11, "Negate" ); setNode( nNot, 12, "Not" ); setNode( nMultiply, 13, "Multiply" ); setNode( nDivide, 14, "Divide" ); setNode( nMod, 15, "Mod" ); setNode( nAdd, 16, "Add" ); setNode( nSubtract, 17, "Subtract" ); setNode( nLess, 18, "Less" ); setNode( nLessEqual, 19, "LessEqual" ); setNode( nGreater, 20, "Greater" ); setNode( nGreaterEqual, 21, "GreaterEqual" ); setNode( nEqual, 22, "Equal" ); setNode( nNotEqual, 23, "NotEqual" ); setNode( nAnd, 24, "And" ); setNode( nOr, 25, "Or" ); tOp_multiply := 1; tkName( tOp_multiply ) := "Op_multiply"; tkPrec( tOp_multiply ) := 5; tOp_divide := 2; tkName( tOp_divide ) := "Op_divide"; tkPrec( tOp_divide ) := 5; tOp_mod := 3; tkName( tOp_mod ) := "Op_mod"; tkPrec( tOp_mod ) := 5; tOp_add := 4; tkName( tOp_add ) := "Op_add"; tkPrec( tOp_add ) := 4; tOp_subtract := 5; tkName( tOp_subtract ) := "Op_subtract"; tkPrec( tOp_subtract ) := 4; tOp_negate := 6; tkName( tOp_negate ) := "Op_negate"; tkPrec( tOp_negate ) := -1; tOp_less := 7; tkName( tOp_less ) := "Op_less"; tkPrec( tOp_less ) := 3; tOp_lessequal := 8; tkName( tOp_lessequal ) := "Op_lessequal"; tkPrec( tOp_lessequal ) := 3; tOp_greater := 9; tkName( tOp_greater ) := "Op_greater"; tkPrec( tOp_greater ) := 3; tOp_greaterequal := 10; tkName( tOp_greaterequal ) := "Op_greaterequal"; tkPrec( tOp_greaterequal ) := 3; tOp_equal := 11; tkName( tOp_equal ) := "Op_equal"; tkPrec( tOp_equal ) := 2; tOp_notequal := 12; tkName( tOp_notequal ) := "Op_notequal"; tkPrec( tOp_notequal ) := 2; tOp_not := 13; tkName( tOp_not ) := "Op_not"; tkPrec( tOp_not ) := -1; tOp_assign := 14; tkName( tOp_assign ) := "Op_assign"; tkPrec( tOp_assign ) := -1; tOp_and := 15; tkName( tOp_and ) := "Op_and"; tkPrec( tOp_and ) := 1; tOp_or := 16; tkName( tOp_or ) := "Op_or"; tkPrec( tOp_or ) := 0; tLeftParen := 17; tkName( tLeftParen ) := "LeftParen"; tkPrec( tLeftParen ) := -1; tRightParen := 18; tkName( tRightParen ) := "RightParen"; tkPrec( tRightParen ) := -1; tLeftBrace := 19; tkName( tLeftBrace ) := "LeftBrace"; tkPrec( tLeftBrace ) := -1; tRightBrace := 20; tkName( tRightBrace ) := "RightBrace"; tkPrec( tRightBrace ) := -1; tSemicolon := 21; tkName( tSemicolon ) := "Semicolon"; tkPrec( tSemicolon ) := -1; tComma := 22; tkName( tComma ) := "Comma"; tkPrec( tComma ) := -1; tKeyword_if := 23; tkName( tKeyword_if ) := "Keyword_if"; tkPrec( tKeyword_if ) := -1; tKeyword_else := 24; tkName( tKeyword_else ) := "Keyword_else"; tkPrec( tKeyword_else ) := -1; tKeyword_while := 25; tkName( tKeyword_while ) := "Keyword_while"; tkPrec( tKeyword_while ) := -1; tKeyword_print := 26; tkName( tKeyword_print ) := "Keyword_print"; tkPrec( tKeyword_print ) := -1; tKeyword_putc := 27; tkName( tKeyword_putc ) := "Keyword_putc"; tkPrec( tKeyword_putc ) := -1; tIdentifier := 28; tkName( tIdentifier ) := "Identifier"; tkPrec( tIdentifier ) := -1; tInteger := 29; tkName( tInteger ) := "Integer"; tkPrec( tInteger ) := -1; tString := 30; tkName( tString ) := "String"; tkPrec( tString ) := -1; tEnd_of_input := 31; tkName( tEnd_of_input ) := "End_of_input"; tkPrec( tEnd_of_input ) := -1; MAX_TOKEN_TYPE := 31; TEXT_MAX := 4095; textNext := 0; PRIMARY_PREC := 6; for tkPos := 1 until MAX_TOKEN_TYPE do tkNode( tkPos ) := - tkPos; tkNode( tOp_multiply ) := nMultiply; tkNode( tOp_divide ) := nDivide; tkNode( tOp_mod ) := nMod; tkNode( tOp_add ) := nAdd; tkNode( tOp_subtract ) := nSubtract; tkNode( tOp_less ) := nLess; tkNode( tOp_lessequal ) := nLessEqual; tkNode( tOp_greater ) := nGreater; tkNode( tOp_greaterequal ) := nGreaterEqual; tkNode( tOp_equal ) := nEqual; tkNode( tOp_notequal ) := nNotEqual; tkNode( tOp_not ) := nNot; tkNode( tOp_and ) := nAnd; tkNode( tOp_or ) := nOr; stList := idList := null; % parse the output from the lexical analyser and output the linearised parse tree % readToken; writeNode( parseStatementList( tEnd_of_input ) ) end.