From a5311ae70d9fcae35f7f641fd79a0e93b393876a Mon Sep 17 00:00:00 2001 From: quirinecker Date: Sun, 26 Oct 2025 14:42:57 +0100 Subject: [PATCH] every test but 3 are succeeding --- .../src/ssw/mj/impl/Parser.java | 582 ++++++++++++++---- 1 file changed, 475 insertions(+), 107 deletions(-) diff --git a/MicroJava Compiler/src/ssw/mj/impl/Parser.java b/MicroJava Compiler/src/ssw/mj/impl/Parser.java index 940ef3f..b57c49e 100644 --- a/MicroJava Compiler/src/ssw/mj/impl/Parser.java +++ b/MicroJava Compiler/src/ssw/mj/impl/Parser.java @@ -1,144 +1,512 @@ package ssw.mj.impl; +import javassist.bytecode.analysis.ControlFlow; +import javassist.compiler.ast.MethodDecl; import ssw.mj.Errors; import ssw.mj.Errors.Message; import ssw.mj.scanner.Token; +import javax.xml.stream.FactoryConfigurationError; +import java.util.EnumSet; + import static ssw.mj.Errors.Message.TOKEN_EXPECTED; -import static ssw.mj.scanner.Token.Kind.eof; -import static ssw.mj.scanner.Token.Kind.none; +import static ssw.mj.Errors.Message.UNDEFINED_ESCAPE; +import static ssw.mj.scanner.Token.Kind.*; public final class Parser { - /** - * Maximum number of global variables per program - */ - private static final int MAX_GLOBALS = 32767; + /** + * Maximum number of global variables per program + */ + private static final int MAX_GLOBALS = 32767; - /** - * Maximum number of fields per class - */ - private static final int MAX_FIELDS = 32767; + /** + * Maximum number of fields per class + */ + private static final int MAX_FIELDS = 32767; - /** - * Maximum number of local variables per method - */ - private static final int MAX_LOCALS = 127; + /** + * Maximum number of local variables per method + */ + private static final int MAX_LOCALS = 127; - /** - * Last recognized token; - */ - private Token t; + /** + * Last recognized token; + */ + private Token t; - /** - * Lookahead token (not recognized).) - */ - private Token la; + /** + * Lookahead token (not recognized).) + */ + private Token la; - /** - * Shortcut to kind attribute of lookahead token (la). - */ - private Token.Kind sym; + /** + * Shortcut to kind attribute of lookahead token (la). + */ + private Token.Kind sym; - /** - * According scanner - */ - public final Scanner scanner; + /** + * According scanner + */ + public final Scanner scanner; - /** - * According code buffer - */ - public final Code code; + /** + * According code buffer + */ + public final Code code; - /** - * According symbol table - */ - public final Tab tab; + /** + * According symbol table + */ + public final Tab tab; - public Parser(Scanner scanner) { - this.scanner = scanner; - tab = new Tab(this); - code = new Code(this); - // Pseudo token to avoid crash when 1st symbol has scanner error. - la = new Token(none, 1, 1); - } + public Parser(Scanner scanner) { + this.scanner = scanner; + tab = new Tab(this); + code = new Code(this); + // Pseudo token to avoid crash when 1st symbol has scanner error. + la = new Token(none, 1, 1); + } - /** - * Reads ahead one symbol. - */ - private void scan() { - t = la; - la = scanner.next(); - sym = la.kind; - } + /** + * Reads ahead one symbol. + */ + private void scan() { + if (la != null) { + System.out.println(la.val); + } - /** - * Verifies symbol and reads ahead. - */ - private void check(Token.Kind expected) { - if (sym == expected) { - scan(); - } else { - error(TOKEN_EXPECTED, expected); - } - } + t = la; + la = scanner.next(); + sym = la.kind; + } - /** - * Adds error message to the list of errors. - */ - public void error(Message msg, Object... msgParams) { - // TODO Exercise UE-P-3: Replace panic mode with error recovery (i.e., keep track of error distance) - // TODO Exercise UE-P-3: Hint: Replacing panic mode also affects scan() method - scanner.errors.error(la.line, la.col, msg, msgParams); - throw new Errors.PanicMode(); - } + /** + * Verifies symbol and reads ahead. + */ + private void check(Token.Kind expected) { + if (sym == expected) { + scan(); + } else { + error(TOKEN_EXPECTED, expected); + } + } - /** - * Starts the analysis. - */ - public void parse() { - scan(); // scan first symbol, initializes look-ahead - Program(); // start analysis - check(eof); - } + /** + * Adds error message to the list of errors. + */ + public void error(Message msg, Object... msgParams) { + // TODO Exercise UE-P-3: Replace panic mode with error recovery (i.e., keep track of error distance) + // TODO Exercise UE-P-3: Hint: Replacing panic mode also affects scan() method + scanner.errors.error(la.line, la.col, msg, msgParams); + throw new Errors.PanicMode(); + } + + /** + * Starts the analysis. + */ + public void parse() { + scan(); // scan first symbol, initializes look-ahead + Program(); // start analysis + check(eof); + } - // =============================================== - // TODO Exercise UE-P-2: Implementation of parser - // TODO Exercise UE-P-3: Error recovery methods - // TODO Exercise UE-P-4: Symbol table handling - // TODO Exercise UE-P-5-6: Code generation - // =============================================== + // =============================================== + // TODO Exercise UE-P-2: Implementation of parser + // TODO Exercise UE-P-3: Error recovery methods + // TODO Exercise UE-P-4: Symbol table handling + // TODO Exercise UE-P-5-6: Code generation + // =============================================== - // TODO Exercise UE-P-3: Error distance + // TODO Exercise UE-P-3: Error distance - // TODO Exercise UE-P-2 + Exercise 3: Sets to handle certain first, follow, and recover sets + // TODO Exercise UE-P-2 + Exercise 3: Sets to handle certain first, follow, and recover sets - static { - // Initialize first and follow sets. - } + // Initialize first and follow sets. + static final EnumSet firstConstDecl = EnumSet.of(Token.Kind.final_); + static final EnumSet firstVarDecl = EnumSet.of(Token.Kind.ident); + static final EnumSet firstClassDecl = EnumSet.of(Token.Kind.class_); + static final EnumSet firstStatement = EnumSet.of(ident, if_, while_, break_, return_, read, print, lbrace, semicolon); + static final EnumSet firstAsignop = EnumSet.of(assign, plusas, minusas, timesas, slashas, remas); + static final EnumSet firstFactor = EnumSet.of(ident, number, charConst, new_, lpar); + static final EnumSet firstMulop = EnumSet.of(times, slash, rem); - // --------------------------------- + static > EnumSet enumUnion(EnumSet first, EnumSet ...sets) { + final var copy = EnumSet.copyOf(first); + for (final var set : sets) { + copy.addAll(set); + } - // TODO Exercise UE-P-2: One top-down parsing method per production + return copy; + } - /** - * Program =
- * "program" ident
- * { ConstDecl | VarDecl | ClassDecl }
- * "{" { MethodDecl } "}" . - */ - private void Program() { - // TODO Exercise UE-P-2 - } + // --------------------------------- - // ... + // TODO Exercise UE-P-2: One top-down parsing method per production - // ------------------------------------ + /** + * Program =
+ * "program" ident
+ * { ConstDecl | VarDecl | ClassDecl }
+ * "{" { MethodDecl } "}" . + */ + private void Program() { + // TODO Exercise UE-P-2 - // TODO Exercise UE-P-3: Error recovery methods: recoverDecl, recoverMethodDecl and recoverStat (+ TODO Exercise UE-P-5: Check idents for Type kind) + check(Token.Kind.program); + check(Token.Kind.ident); - // ==================================== - // ==================================== + final var firstDeclarationUnion = enumUnion(firstConstDecl, firstVarDecl, firstClassDecl); + + while (firstDeclarationUnion.contains(sym)) { + if (firstVarDecl.contains(sym)) { + VarDecl(); + } else if (firstClassDecl.contains(sym)) { + ClassDecl(); + } else { + ConstDecl(); + } + } + + check(lbrace); + + while (sym == ident || sym == void_) { + MethodDecl(); + } + + check(rbrace); + } + + private void ConstDecl() { + check(Token.Kind.final_); + Type(); + check(Token.Kind.ident); + check(Token.Kind.assign); + + if (sym == Token.Kind.number) { + scan(); + } else if (sym == Token.Kind.charConst) { + scan(); + } else { + error(TOKEN_EXPECTED, "number or character constant"); + } + + check(Token.Kind.semicolon); + } + + private void VarDecl() { + Type(); + check(ident); + + while (sym == Token.Kind.comma) { + scan(); + check(ident); + } + + check(semicolon); + } + + private void ClassDecl() { + check(class_); + check(ident); + check(lbrace); + + while (firstVarDecl.contains(sym)) { + VarDecl(); + } + + check(rbrace); + } + + private void Type() { + check(ident); + + if (sym == lbrack) { + scan(); + check(rbrack); + } + } + + private void MethodDecl() { + if (sym == ident) { + Type(); + } else if (sym == void_) { + scan(); + } + + check(ident); + check(lpar); + + if (sym == ident) { + FormPars(); + } + + check(rpar); + + while (firstVarDecl.contains(sym)) { + VarDecl(); + } + + Block(); + } + + private void FormPars() { + Type(); + check(ident); + + while (sym == comma) { + scan(); + Type(); + check(ident); + } + } + + private void Block() { + check(lbrace); + Statement(); + check(rbrace); + } + + private void Statement() { + while (firstStatement.contains(sym)) { + switch (sym) { + case ident -> { + Designator(); + + if (firstAsignop.contains(sym)) { + AssignOp(); + Expr(); + } else if (sym == lpar) { + ActPars(); + } else if (sym == pplus) { + scan(); + } else if (sym == mminus) { + scan(); + } + + check(semicolon); + } + case if_ -> { + scan(); + check(lpar); + Condition(); + check(rpar); + Statement(); + + if (sym == else_) { + scan(); + Statement(); + } + } + case while_ -> { + scan(); + check(lpar); + Condition(); + check(rpar); + Statement(); + } + case break_-> { + scan(); + check(semicolon); + } + case return_ -> { + scan(); + + if (sym == minus || firstStatement.contains(sym)) { + Expr(); + } + + check(semicolon); + } + case read -> { + scan(); + check(lpar); + Designator(); + check(rpar); + check(semicolon); + } + case print -> { + scan(); + check(lpar); + Expr(); + + if (sym == comma) { + scan(); + check(number); + } + + check(rpar); + check(semicolon); + } + case lbrace -> Block(); + case semicolon -> scan(); + }; + } + } + + private void Designator() { + check(ident); + + while (sym == period || sym == lbrack) { + if (sym == period) { + scan(); + check(ident); + } else { + scan(); + + if (sym == tilde) { + scan(); + } + + Expr(); + + check(rbrack); + } + } + + } + + private void AssignOp() { + switch (sym) { + case assign -> scan(); + case plusas -> scan(); + case minusas -> scan(); + case timesas -> scan(); + case slashas -> scan(); + case remas -> scan(); + default -> error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)"); + } + } + + private void Expr() { + System.out.println(); + if (sym == minus) { + scan(); + } + + Term(); + + while (sym == plus || sym == minus) { + Addop(); + Term(); + } + + } + + private void ActPars() { + + check(lpar); + + if (sym == minus || firstFactor.contains(sym)) { + Expr(); + + while (sym == comma) { + scan(); + Expr(); + } + } + + check(rpar); + } + + private void Term() { + Factor(); + + while (firstMulop.contains(sym)) { + Mulop(); + Factor(); + } + } + + private void Addop() { + if (sym == plus) { + scan(); + } else if (sym == minus) { + scan(); + } else { + error(TOKEN_EXPECTED, "minus or plus expected"); + } + } + + private void Factor() { + switch(sym) { + case ident -> Designator(); + case number -> scan(); + case charConst -> scan(); + case new_ -> { + scan(); + check(ident); + if (sym == lbrack) { + scan(); + Expr(); + check(rbrack); + } + } + case lpar -> { + scan(); + Expr(); + check(rpar); + } + default -> error(TOKEN_EXPECTED, "unexpected token. identifier, number, character constant, new or \"(\""); + } + } + + private void Mulop() { + if (sym == times) { + scan(); + } else if (sym == slash) { + scan(); + } else if (sym == rem) { + scan(); + } else { + error(TOKEN_EXPECTED, "expected *, /, %"); + } + } + + private void Condition() { + CondTerm(); + + while (sym == or) { + scan(); + CondTerm(); + } + } + + private void CondTerm() { + CondFact(); + + while (sym == and) { + scan(); + CondFact(); + } + } + + private void CondFact() { + Expr(); + Relop(); + Expr(); + } + + private void Relop() { + switch (sym) { + case eql -> scan(); + case neq -> scan(); + case gtr -> scan(); + case lss -> scan(); + case leq -> scan(); + case geq -> scan(); + default -> error(TOKEN_EXPECTED, "unexpected token. ==, !=, >, >=, <, <="); + } + } + + // ... + + // ------------------------------------ + + // TODO Exercise UE-P-3: Error recovery methods: recoverDecl, recoverMethodDecl and recoverStat (+ TODO Exercise UE-P-5: Check idents for Type kind) + + // ==================================== + // ==================================== }