diff --git a/MicroJava Compiler/src/ssw/mj/impl/Scanner.java b/MicroJava Compiler/src/ssw/mj/impl/Scanner.java index 75d27f9..1b2ac66 100644 --- a/MicroJava Compiler/src/ssw/mj/impl/Scanner.java +++ b/MicroJava Compiler/src/ssw/mj/impl/Scanner.java @@ -3,106 +3,367 @@ package ssw.mj.impl; import ssw.mj.Errors; import ssw.mj.scanner.Token; +import java.io.IOException; import java.io.Reader; import java.util.HashMap; import java.util.Map; +import java.util.Optional; -import static ssw.mj.scanner.Token.Kind.none; +import static ssw.mj.scanner.Token.Kind.*; public class Scanner { - // Scanner Skeleton - do not rename fields / methods ! - private static final char EOF = (char) -1; - private static final char LF = '\n'; + // Scanner Skeleton - do not rename fields / methods ! + private static final char EOF = (char) -1; + private static final char LF = '\n'; - /** - * Input data to read from. - */ - private final Reader in; + /** + * Input data to read from. + */ + private final Reader in; - /** - * Lookahead character. (= next (unhandled) character in the input stream) - */ - private char ch; + /** + * Lookahead character. (= next (unhandled) character in the input stream) + */ + private char ch; - /** - * Current line in input stream. - */ - private int line; + /** + * Current line in input stream. + */ + private int line; - /** - * Current column in input stream. - */ - private int col; + /** + * Current column in input stream. + */ + private int col; - /** - * According errors object. - */ - public final Errors errors; + /** + * According errors object. + */ + public final Errors errors; - public Scanner(Reader r) { - // store reader - in = r; + public Scanner(Reader r) { + // store reader + in = r; - // initialize error handling support - errors = new Errors(); + // initialize error handling support + errors = new Errors(); - line = 1; - col = 0; - nextCh(); // read 1st char into ch, incr col to 1 - } + line = 1; + col = 0; + nextCh(); // read 1st char into ch, incr col to 1 + } - /** - * Adds error message to the list of errors. - */ - public final void error(Token t, Errors.Message msg, Object... msgParams) { - errors.error(t.line, t.col, msg, msgParams); + /** + * Adds error message to the list of errors. + */ + public final void error(Token t, Errors.Message msg, Object... msgParams) { + errors.error(t.line, t.col, msg, msgParams); - // reset token content (consistent JUnit tests) - t.numVal = 0; - t.val = null; - } + // reset token content (consistent JUnit tests) + t.numVal = 0; + t.val = null; + } - // ================================================ - // TODO Exercise UE-P-1: Implement Scanner (next() + private helper methods) - // ================================================ + // ================================================ + // TODO Exercise UE-P-1: Implement Scanner (next() + private helper methods) + // ================================================ - // TODO Exercise UE-P-1: Keywords - /** - * Mapping from keyword names to appropriate token codes. - */ - private static final Map keywords; + // TODO Exercise UE-P-1: Keywords + /** + * Mapping from keyword names to appropriate token codes. + */ + private static final Map keywords; - static { - keywords = new HashMap<>(); - } + static { + keywords = Map.ofEntries( + Map.entry("void", void_), + Map.entry("class", class_), + Map.entry("program", program), + Map.entry("if", if_), + Map.entry("else", else_), + Map.entry("while", while_), + Map.entry("read", read), + Map.entry("print", print), + Map.entry("return", return_), + Map.entry("break", break_), + Map.entry("final", final_), + Map.entry("new", new_) + ); + } - /** - * Returns next token. To be used by parser. - */ - public Token next() { - // TODO Exercise UE-P-1: implementation of next method - Token t = new Token(none, 1, 1); - return t; - } + /** + * Returns next token. To be used by parser. + */ + public Token next() { + // TODO Exercise UE-P-1: implementation of next method + while (Character.isWhitespace(ch)) { + nextCh(); + } - private void nextCh() { - // TODO Exercise UE-P-1: implementation of nextCh method and other private helper methods - } + final var ogLine = line; + final var ogCol = col; - // TODO Exercise UE-P-1: private helper methods used by next(), as discussed in the exercise + if (isLetter(ch)) { + final var name = readName(); - // ----------------------------------------------- + return getTokenByName(name, ogLine, ogCol); + } - private boolean isLetter(char c) { - return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'; - } + if (isDigit(ch)) { + final var number = readNumber(); - private boolean isDigit(char c) { - return '0' <= c && c <= '9'; - } + var token = new Token(Token.Kind.number, ogLine, ogCol); + token.numVal = number; + token.val = number + ""; - // ================================================ - // ================================================ + return token; + } + + if (ch == '\'') { + nextCh(); + + if (ch == '\\') { + nextCh(); + final var charContent = switch (ch) { + case 'n' -> '\n'; + case 'r' -> '\r'; + case 't' -> '\t'; + case '\\' -> '\\'; + case '\'' -> '\''; + default -> ' '; + }; + + final var token = new Token(charConst, ogLine, ogCol); + token.val = charContent + ""; + token.numVal = charContent; + nextCh(); + nextCh(); + return token; + } + + final var charContent = ch; + + if (charContent == LF || charContent == EOF || charContent == '\'') { + final var token = new Token(charConst, ogLine, ogCol); + final var message = switch (charContent) { + case LF -> Errors.Message.ILLEGAL_LINE_END; + case EOF -> Errors.Message.EOF_IN_CHAR; + case '\'' -> Errors.Message.EMPTY_CHARCONST; + default -> null; + }; + + error(token, message); + token.val = '\0' + ""; + token.numVal = 0; + + if (charContent == '\'') { + nextCh(); + } + + return token; + } + + nextCh(); + + if (ch != '\'') { + final var token = new Token(charConst, ogLine, ogCol); + error(token, Errors.Message.MISSING_QUOTE); + token.val = '\0' + ""; + token.numVal = 0; + return token; + } + + nextCh(); + + final var token = new Token(charConst, ogLine, ogCol); + token.val = charContent + ""; + token.numVal = charContent; + return token; + } + + final var ogChar = ch; + nextCh(); + return switch(ogChar) { + case ';' -> new Token(Token.Kind.semicolon, ogLine, ogCol); + case ',' -> new Token(Token.Kind.comma, ogLine, ogCol); + case '.' -> new Token(Token.Kind.period, ogLine, ogCol); + case '{' -> new Token(Token.Kind.lbrace, ogLine, ogCol); + case '}' -> new Token(Token.Kind.rbrace, ogLine, ogCol); + case (char) -1 -> new Token(Token.Kind.eof, ogLine, ogCol); + case '=' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.eql, ogLine, ogCol); + } + + yield new Token(Token.Kind.assign, line, col); + } + case '+' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.plusas, ogLine, ogCol); + } + + if (ch == '+') { + nextCh(); + yield new Token(Token.Kind.pplus, ogLine, ogCol); + } + + yield new Token(Token.Kind.plus, ogLine, ogCol); + } + case '-' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.minusas, ogLine, ogCol); + } + + if (ch == '-') { + nextCh(); + yield new Token(Token.Kind.mminus, ogLine, ogCol); + } + + yield new Token(Token.Kind.minus, ogLine, ogCol); + } + case '*' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.timesas, ogLine, ogCol); + } + + yield new Token(Token.Kind.times, ogLine, ogCol); + } + case '/' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.slashas, ogLine, ogCol); + } + + yield new Token(Token.Kind.slash, ogLine, ogCol); + } + case '%' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.remas, ogLine, ogCol); + } + + yield new Token(Token.Kind.rem, ogLine, ogCol); + } + case '!' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.neq, ogLine, ogCol); + } + + error(new Token(none, line, col), Errors.Message.INVALID_CHAR); + yield next(); + } + case '>' -> { + if (ch == '=') { + nextCh(); + yield new Token(Token.Kind.geq, ogLine, ogCol); + } + + yield new Token(Token.Kind.gtr, ogLine, ogCol); + } + case '<' -> { + if (ch == '<') { + nextCh(); + yield new Token(Token.Kind.leq, ogLine, ogCol); + } + + yield new Token(Token.Kind.lss, ogLine, ogCol); + } + case '&' -> { + if (ch == '&') { + nextCh(); + yield new Token(Token.Kind.and, ogLine, ogCol); + } + + error(new Token(none, line, col), Errors.Message.INVALID_CHAR); + yield next(); + } + case '|' -> { + if (ch == '|') { + nextCh(); + yield new Token(Token.Kind.and, ogLine, ogCol); + } + + error(new Token(none, line, col), Errors.Message.INVALID_CHAR); + yield next(); + } + default -> { + error(new Token(none, line, col), Errors.Message.INVALID_CHAR); + yield next(); + } + }; + } + + private void nextCh() { + // TODO Exercise UE-P-1: implementation of nextCh method and other private helper methods + try { + final var intChar = in.read(); + final var nextChar = (char) intChar; + + if (ch == '\n') { + line++; + col = 0; + } + + col++; + ch = nextChar; + } catch (IOException e) { + System.err.println("Could not read Stream"); + } + } + + // TODO Exercise UE-P-1: private helper methods used by next(), as discussed in the exercise + + // ----------------------------------------------- + + private boolean isLetter(char c) { + return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'; + } + + private boolean isDigit(char c) { + return '0' <= c && c <= '9'; + } + + public String readName() { + final var builder = new StringBuilder(); + + while (isLetter(ch)) { + builder.append(ch); + nextCh(); + } + + return builder.toString(); + } + + public int readNumber() { + final var builder = new StringBuilder(); + + while (isDigit(ch)) { + builder.append(ch); + nextCh(); + } + + return Integer.parseInt(builder.toString()); + } + + private Token getTokenByName(String name, int line, int col) { + final var kind = keywords.getOrDefault(name, Token.Kind.ident); + + var token = new Token(kind, line, col); + + if (kind == Token.Kind.ident) { + token.val = name; + } + + return token; + } + + // ================================================ + // ================================================ }