package ssw.mj.impl; import javassist.bytecode.analysis.ControlFlow; import javassist.compiler.ast.MethodDecl; import ssw.mj.Errors; import ssw.mj.Errors.Message; import ssw.mj.scanner.Token; import javax.xml.stream.FactoryConfigurationError; import java.util.EnumSet; import static ssw.mj.Errors.Message.TOKEN_EXPECTED; import static ssw.mj.Errors.Message.UNDEFINED_ESCAPE; import static ssw.mj.scanner.Token.Kind.*; public final class Parser { /** * Maximum number of global variables per program */ private static final int MAX_GLOBALS = 32767; /** * Maximum number of fields per class */ private static final int MAX_FIELDS = 32767; /** * Maximum number of local variables per method */ private static final int MAX_LOCALS = 127; /** * Last recognized token; */ private Token t; /** * Lookahead token (not recognized).) */ private Token la; /** * Shortcut to kind attribute of lookahead token (la). */ private Token.Kind sym; /** * According scanner */ public final Scanner scanner; /** * According code buffer */ public final Code code; /** * According symbol table */ public final Tab tab; public Parser(Scanner scanner) { this.scanner = scanner; tab = new Tab(this); code = new Code(this); // Pseudo token to avoid crash when 1st symbol has scanner error. la = new Token(none, 1, 1); } /** * Reads ahead one symbol. */ private void scan() { if (la != null) { System.out.println(la.val); } t = la; la = scanner.next(); sym = la.kind; } /** * Verifies symbol and reads ahead. */ private void check(Token.Kind expected) { if (sym == expected) { scan(); } else { error(TOKEN_EXPECTED, expected); } } /** * Adds error message to the list of errors. */ public void error(Message msg, Object... msgParams) { // TODO Exercise UE-P-3: Replace panic mode with error recovery (i.e., keep track of error distance) // TODO Exercise UE-P-3: Hint: Replacing panic mode also affects scan() method scanner.errors.error(la.line, la.col, msg, msgParams); throw new Errors.PanicMode(); } /** * Starts the analysis. */ public void parse() { scan(); // scan first symbol, initializes look-ahead Program(); // start analysis check(eof); } // =============================================== // TODO Exercise UE-P-2: Implementation of parser // TODO Exercise UE-P-3: Error recovery methods // TODO Exercise UE-P-4: Symbol table handling // TODO Exercise UE-P-5-6: Code generation // =============================================== // TODO Exercise UE-P-3: Error distance // TODO Exercise UE-P-2 + Exercise 3: Sets to handle certain first, follow, and recover sets // Initialize first and follow sets. static final EnumSet firstConstDecl = EnumSet.of(Token.Kind.final_); static final EnumSet firstVarDecl = EnumSet.of(Token.Kind.ident); static final EnumSet firstClassDecl = EnumSet.of(Token.Kind.class_); static final EnumSet firstStatement = EnumSet.of(ident, if_, while_, break_, return_, read, print, lbrace, semicolon); static final EnumSet firstAsignop = EnumSet.of(assign, plusas, minusas, timesas, slashas, remas); static final EnumSet firstFactor = EnumSet.of(ident, number, charConst, new_, lpar); static final EnumSet firstMulop = EnumSet.of(times, slash, rem); static > EnumSet enumUnion(EnumSet first, EnumSet ...sets) { final var copy = EnumSet.copyOf(first); for (final var set : sets) { copy.addAll(set); } return copy; } // --------------------------------- // TODO Exercise UE-P-2: One top-down parsing method per production /** * Program =
* "program" ident
* { ConstDecl | VarDecl | ClassDecl }
* "{" { MethodDecl } "}" . */ private void Program() { // TODO Exercise UE-P-2 check(Token.Kind.program); check(Token.Kind.ident); final var firstDeclarationUnion = enumUnion(firstConstDecl, firstVarDecl, firstClassDecl); while (firstDeclarationUnion.contains(sym)) { if (firstVarDecl.contains(sym)) { VarDecl(); } else if (firstClassDecl.contains(sym)) { ClassDecl(); } else { ConstDecl(); } } check(lbrace); while (sym == ident || sym == void_) { MethodDecl(); } check(rbrace); } private void ConstDecl() { check(Token.Kind.final_); Type(); check(Token.Kind.ident); check(Token.Kind.assign); if (sym == Token.Kind.number) { scan(); } else if (sym == Token.Kind.charConst) { scan(); } else { error(TOKEN_EXPECTED, "number or character constant"); } check(Token.Kind.semicolon); } private void VarDecl() { Type(); check(ident); while (sym == Token.Kind.comma) { scan(); check(ident); } check(semicolon); } private void ClassDecl() { check(class_); check(ident); check(lbrace); while (firstVarDecl.contains(sym)) { VarDecl(); } check(rbrace); } private void Type() { check(ident); if (sym == lbrack) { scan(); check(rbrack); } } private void MethodDecl() { if (sym == ident) { Type(); } else if (sym == void_) { scan(); } check(ident); check(lpar); if (sym == ident) { FormPars(); } check(rpar); while (firstVarDecl.contains(sym)) { VarDecl(); } Block(); } private void FormPars() { Type(); check(ident); while (sym == comma) { scan(); Type(); check(ident); } } private void Block() { check(lbrace); Statement(); check(rbrace); } private void Statement() { while (firstStatement.contains(sym)) { switch (sym) { case ident -> { Designator(); if (firstAsignop.contains(sym)) { AssignOp(); Expr(); } else if (sym == lpar) { ActPars(); } else if (sym == pplus) { scan(); } else if (sym == mminus) { scan(); } check(semicolon); } case if_ -> { scan(); check(lpar); Condition(); check(rpar); Statement(); if (sym == else_) { scan(); Statement(); } } case while_ -> { scan(); check(lpar); Condition(); check(rpar); Statement(); } case break_-> { scan(); check(semicolon); } case return_ -> { scan(); if (sym == minus || firstStatement.contains(sym)) { Expr(); } check(semicolon); } case read -> { scan(); check(lpar); Designator(); check(rpar); check(semicolon); } case print -> { scan(); check(lpar); Expr(); if (sym == comma) { scan(); check(number); } check(rpar); check(semicolon); } case lbrace -> Block(); case semicolon -> scan(); }; } } private void Designator() { check(ident); while (sym == period || sym == lbrack) { if (sym == period) { scan(); check(ident); } else { scan(); if (sym == tilde) { scan(); } Expr(); check(rbrack); } } } private void AssignOp() { switch (sym) { case assign -> scan(); case plusas -> scan(); case minusas -> scan(); case timesas -> scan(); case slashas -> scan(); case remas -> scan(); default -> error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)"); } } private void Expr() { System.out.println(); if (sym == minus) { scan(); } Term(); while (sym == plus || sym == minus) { Addop(); Term(); } } private void ActPars() { check(lpar); if (sym == minus || firstFactor.contains(sym)) { Expr(); while (sym == comma) { scan(); Expr(); } } check(rpar); } private void Term() { Factor(); while (firstMulop.contains(sym)) { Mulop(); Factor(); } } private void Addop() { if (sym == plus) { scan(); } else if (sym == minus) { scan(); } else { error(TOKEN_EXPECTED, "minus or plus expected"); } } private void Factor() { switch(sym) { case ident -> Designator(); case number -> scan(); case charConst -> scan(); case new_ -> { scan(); check(ident); if (sym == lbrack) { scan(); Expr(); check(rbrack); } } case lpar -> { scan(); Expr(); check(rpar); } default -> error(TOKEN_EXPECTED, "unexpected token. identifier, number, character constant, new or \"(\""); } } private void Mulop() { if (sym == times) { scan(); } else if (sym == slash) { scan(); } else if (sym == rem) { scan(); } else { error(TOKEN_EXPECTED, "expected *, /, %"); } } private void Condition() { CondTerm(); while (sym == or) { scan(); CondTerm(); } } private void CondTerm() { CondFact(); while (sym == and) { scan(); CondFact(); } } private void CondFact() { Expr(); Relop(); Expr(); } private void Relop() { switch (sym) { case eql -> scan(); case neq -> scan(); case gtr -> scan(); case lss -> scan(); case leq -> scan(); case geq -> scan(); default -> error(TOKEN_EXPECTED, "unexpected token. ==, !=, >, >=, <, <="); } } // ... // ------------------------------------ // TODO Exercise UE-P-3: Error recovery methods: recoverDecl, recoverMethodDecl and recoverStat (+ TODO Exercise UE-P-5: Check idents for Type kind) // ==================================== // ==================================== }