package ssw.mj.impl; import ssw.mj.Errors.Message; import ssw.mj.scanner.Token; import ssw.mj.symtab.Obj; import ssw.mj.symtab.Struct; import java.util.EnumSet; import static ssw.mj.Errors.Message.*; import static ssw.mj.scanner.Token.Kind.*; public final class Parser { /** * Maximum number of global variables per program */ private static final int MAX_GLOBALS = 32767; /** * Maximum number of fields per class */ private static final int MAX_FIELDS = 32767; /** * Maximum number of local variables per method */ private static final int MAX_LOCALS = 127; /** * Last recognized token; */ private Token t; /** * Lookahead token (not recognized).) */ private Token la; /** * Shortcut to kind attribute of lookahead token (la). */ private Token.Kind sym; /** * According scanner */ public final Scanner scanner; /** * According code buffer */ public final Code code; /** * According symbol table */ public final Tab tab; private static int MIN_ERROR_DIST = 3; private int errorDist = MIN_ERROR_DIST; public Parser(Scanner scanner) { this.scanner = scanner; tab = new Tab(this); code = new Code(this); // Pseudo token to avoid crash when 1st symbol has scanner error. la = new Token(none, 1, 1); } /** * Reads ahead one symbol. */ private void scan() { t = la; la = scanner.next(); sym = la.kind; errorDist++; } /** * Verifies symbol and reads ahead. */ private void check(Token.Kind expected) { if (sym == expected) { scan(); } else { error(TOKEN_EXPECTED, expected); } } /** * Adds error message to the list of errors. */ public void error(Message msg, Object... msgParams) { // TODO Exercise UE-P-3: Replace panic mode with error recovery (i.e., keep track of error distance) // TODO Exercise UE-P-3: Hint: Replacing panic mode also affects scan() method if (errorDist >= MIN_ERROR_DIST) { scanner.errors.error(la.line, la.col, msg, msgParams); } errorDist = 0; } /** * Starts the analysis. */ public void parse() { scan(); // scan first symbol, initializes look-ahead Program(); // start analysis check(eof); } // =============================================== // TODO Exercise UE-P-2: Implementation of parser // TODO Exercise UE-P-3: Error recovery methods // TODO Exercise UE-P-4: Symbol table handling // TODO Exercise UE-P-5-6: Code generation // =============================================== // TODO Exercise UE-P-3: Error distance // TODO Exercise UE-P-2 + Exercise 3: Sets to handle certain first, follow, and recover sets // Initialize first and follow sets. static final EnumSet firstConstDecl = EnumSet.of(Token.Kind.final_); static final EnumSet firstVarDecl = EnumSet.of(Token.Kind.ident); static final EnumSet firstClassDecl = EnumSet.of(Token.Kind.class_); static final EnumSet breakDecl = EnumSet.of(lbrace, eof); static final EnumSet firstStatement = EnumSet.of(ident, if_, while_, break_, return_, read, print, lbrace, semicolon); static final EnumSet breakStatement = EnumSet.of(rbrace, else_, eof); static final EnumSet firstAsignop = EnumSet.of(assign, plusas, minusas, timesas, slashas, remas); static final EnumSet firstFactor = EnumSet.of(ident, number, charConst, new_, lpar); static final EnumSet firstMulop = EnumSet.of(times, slash, rem); static final EnumSet firstMethodDecl = EnumSet.of(ident, void_); static final EnumSet breakMethodDecl = EnumSet.of(rbrace, eof); static > EnumSet enumUnion(EnumSet first, EnumSet... sets) { final var copy = EnumSet.copyOf(first); for (final var set : sets) { copy.addAll(set); } return copy; } // --------------------------------- // TODO Exercise UE-P-2: One top-down parsing method per production /** * Program =
* "program" ident
* { ConstDecl | VarDecl | ClassDecl }
* "{" { MethodDecl } "}" . */ private void Program() { // TODO Exercise UE-P-2 check(Token.Kind.program); check(Token.Kind.ident); final var program = tab.insert(Obj.Kind.Prog, t.val, Tab.noType); tab.openScope(); while (!breakDecl.contains(sym)) { if (firstVarDecl.contains(sym)) { VarDecl(); } else if (firstClassDecl.contains(sym)) { ClassDecl(); } else if (firstConstDecl.contains(sym)) { ConstDecl(); } else { recoverDecl(); } } if (tab.curScope.nVars() > MAX_GLOBALS) { error(TOO_MANY_GLOBALS); } check(lbrace); while (!breakMethodDecl.contains(sym)) { if (firstMethodDecl.contains(sym)) { MethodDecl(); } else { recoverMethodDecl(); } } check(rbrace); program.locals = tab.curScope.locals(); tab.closeScope(); } private void ConstDecl() { check(Token.Kind.final_); final var type = Type(); check(Token.Kind.ident); final var identVal = t.val; check(Token.Kind.assign); if (sym == Token.Kind.number) { if (type != Tab.intType) { error(INCOMPATIBLE_TYPES); } scan(); final var obj = tab.insert(Obj.Kind.Con, identVal, type); obj.val = t.numVal; } else if (sym == Token.Kind.charConst) { if (type != Tab.charType) { error(INCOMPATIBLE_TYPES); } scan(); final var obj = tab.insert(Obj.Kind.Con, identVal, type); obj.val = t.numVal; } else { error(TOKEN_EXPECTED, "number or character constant"); } check(Token.Kind.semicolon); } private void VarDecl() { final var type = Type(); check(ident); tab.insert(Obj.Kind.Var, t.val, type); while (sym == Token.Kind.comma) { scan(); check(ident); tab.insert(Obj.Kind.Var, t.val, type); System.out.println(tab.curScope.nVars()); } check(semicolon); } private void ClassDecl() { check(class_); check(ident); final var identVal = t.val; check(lbrace); final var structObj = new Struct(Struct.Kind.Class); tab.insert(Obj.Kind.Type, identVal, structObj); tab.openScope(); while (firstVarDecl.contains(sym)) { VarDecl(); } structObj.fields = tab.curScope.locals(); if (tab.curScope.nVars() > MAX_FIELDS) { error(TOO_MANY_FIELDS); } System.out.println(identVal); tab.closeScope(); check(rbrace); } private Struct Type() { check(ident); final var identVal = t.val; var isArray = false; if (sym == lbrack) { isArray = true; scan(); check(rbrack); } final var type = switch (identVal) { case "int" -> Tab.intType; case "char" -> Tab.charType; default -> { final var obj = tab.find(identVal); if (obj == tab.noObj) { error(NAME_NOT_FOUND, identVal); } if (obj.kind != Obj.Kind.Type) { error(TYPE_EXPECTED); } yield obj.type; } }; if (isArray) { return new Struct(type); } return type; } private void MethodDecl() { var npars = 0; Struct type = Tab.noType; if (sym == ident) { type = Type(); } else if (sym == void_) { scan(); } check(ident); final var methodObj = tab.insert(Obj.Kind.Meth, t.val, type); check(lpar); tab.openScope(); if (sym == ident) { npars = FormPars(); } check(rpar); while (firstVarDecl.contains(sym)) { VarDecl(); } if (tab.curScope.nVars() > MAX_LOCALS) { error(TOO_MANY_LOCALS); } if (methodObj.name.equals("main") && methodObj.type != Tab.noType) { error(MAIN_NOT_VOID); } if (methodObj.name.equals("main") && npars > 0) { error(MAIN_WITH_PARAMS); } Block(); methodObj.locals = tab.curScope.locals(); methodObj.nPars = npars; tab.closeScope(); } private int FormPars() { var nPars = 0; var type = Type(); check(ident); tab.insert(Obj.Kind.Var, t.val, type); nPars++; while (sym == comma) { scan(); type = Type(); check(ident); tab.insert(Obj.Kind.Var, t.val, type); nPars++; } return nPars; } private void Block() { check(lbrace); Statement(); check(rbrace); } private void Statement() { while (!breakStatement.contains(sym)) { switch (sym) { case ident -> { Designator(); if (firstAsignop.contains(sym)) { AssignOp(); Expr(); } else if (sym == lpar) { ActPars(); } else if (sym == pplus) { scan(); } else if (sym == mminus) { scan(); } else { error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)"); } check(semicolon); } case if_ -> { scan(); check(lpar); Condition(); check(rpar); Statement(); if (sym == else_) { scan(); Statement(); } } case while_ -> { scan(); check(lpar); Condition(); check(rpar); Statement(); } case break_ -> { scan(); check(semicolon); } case return_ -> { scan(); if (sym == minus || firstFactor.contains(sym)) { Expr(); } check(semicolon); } case read -> { scan(); check(lpar); Designator(); check(rpar); check(semicolon); } case print -> { scan(); check(lpar); Expr(); if (sym == comma) { scan(); check(number); } check(rpar); check(semicolon); } case lbrace -> Block(); case semicolon -> scan(); default -> recoverStatement(); } } } private void Designator() { check(ident); final var targetObject = tab.find(t.val); var lastType = targetObject.type; if (targetObject == tab.noObj) { error(NAME_NOT_FOUND, t.val); } while (sym == period || sym == lbrack) { if (sym == period) { scan(); check(ident); final var fieldObject = tab.findField(t.val, lastType); if (fieldObject == tab.noObj) { error(FIELD_NOT_FOUND, t.val); } lastType = fieldObject.type; } else { scan(); if (sym == tilde) { scan(); } Expr(); lastType = lastType.elemType; check(rbrack); } } } private void AssignOp() { switch (sym) { case assign -> scan(); case plusas -> scan(); case minusas -> scan(); case timesas -> scan(); case slashas -> scan(); case remas -> scan(); default -> error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)"); } } private void Expr() { if (sym == minus) { scan(); } Term(); while (sym == plus || sym == minus) { Addop(); Term(); } } private void ActPars() { check(lpar); if (sym == minus || firstFactor.contains(sym)) { Expr(); while (sym == comma) { scan(); Expr(); } } check(rpar); } private void Term() { Factor(); while (firstMulop.contains(sym)) { Mulop(); Factor(); } } private void Addop() { if (sym == plus) { scan(); } else if (sym == minus) { scan(); } else { error(TOKEN_EXPECTED, "minus or plus expected"); } } private void Factor() { switch (sym) { case ident -> { Designator(); if (sym == lpar) { ActPars(); } } case number -> scan(); case charConst -> scan(); case new_ -> { scan(); check(ident); if (sym == lbrack) { scan(); Expr(); check(rbrack); } else { final var typeObj = tab.find(t.val); if (typeObj == tab.noObj) { error(NAME_NOT_FOUND, t.val); } if (typeObj.type.kind != Struct.Kind.Class) { error(CLASS_TYPE_EXPECTED); } } } case lpar -> { scan(); Expr(); check(rpar); } default -> error(TOKEN_EXPECTED, "unexpected token. identifier, number, character constant, new or \"(\""); } } private void Mulop() { if (sym == times) { scan(); } else if (sym == slash) { scan(); } else if (sym == rem) { scan(); } else { error(TOKEN_EXPECTED, "expected *, /, %"); } } private void Condition() { CondTerm(); while (sym == or) { scan(); CondTerm(); } } private void CondTerm() { CondFact(); while (sym == and) { scan(); CondFact(); } } private void CondFact() { Expr(); Relop(); Expr(); } private void Relop() { switch (sym) { case eql -> scan(); case neq -> scan(); case gtr -> scan(); case lss -> scan(); case leq -> scan(); case geq -> scan(); default -> error(TOKEN_EXPECTED, "unexpected token. ==, !=, >, >=, <, <="); } } // ... // ------------------------------------ // TODO Exercise UE-P-3: Error recovery methods: recoverDecl, recoverMethodDecl and recoverStat (+ TODO Exercise UE-P-5: Check idents for Type kind) private void recoverDecl() { final var firstDeclarationUnion = enumUnion(firstConstDecl, firstVarDecl, firstClassDecl); final var recoverDeclSet = enumUnion(breakDecl, firstDeclarationUnion); error(DECLARATION_RECOVERY); do { scan(); } while (!recoverDeclSet.contains(sym)); errorDist = 0; } private void recoverMethodDecl() { error(METHOD_DECL_RECOVERY); final var recoveryMethoDeclSet = enumUnion(breakMethodDecl, firstMethodDecl); do { scan(); } while (!recoveryMethoDeclSet.contains(sym)); errorDist = 0; } private void recoverStatement() { error(STATEMENT_RECOVERY); var recoveryStatementSet = enumUnion(breakStatement, firstStatement); recoveryStatementSet.remove(ident); recoveryStatementSet.remove(lbrace); do { scan(); } while (!recoveryStatementSet.contains(sym)); errorDist = 0; } // ==================================== // ==================================== }