diff --git a/MicroJava Compiler/src/ssw/mj/impl/Scanner.java b/MicroJava Compiler/src/ssw/mj/impl/Scanner.java index 1b2ac66..898bd3e 100644 --- a/MicroJava Compiler/src/ssw/mj/impl/Scanner.java +++ b/MicroJava Compiler/src/ssw/mj/impl/Scanner.java @@ -1,15 +1,15 @@ package ssw.mj.impl; import ssw.mj.Errors; +import ssw.mj.Interpreter; import ssw.mj.scanner.Token; import java.io.IOException; import java.io.Reader; -import java.util.HashMap; import java.util.Map; -import java.util.Optional; import static ssw.mj.scanner.Token.Kind.*; +import static ssw.mj.scanner.Token.Kind.number; public class Scanner { @@ -112,40 +112,60 @@ public class Scanner { } if (isDigit(ch)) { - final var number = readNumber(); + final var stringNumber = readNumber(); - var token = new Token(Token.Kind.number, ogLine, ogCol); - token.numVal = number; - token.val = number + ""; + try { + final var number = Integer.parseInt(stringNumber); - return token; + var token = new Token(Token.Kind.number, ogLine, ogCol); + token.numVal = number; + token.val = stringNumber; + + return token; + } catch (NumberFormatException exception) { + final var token = new Token(number, ogLine, ogCol); + error(token, Errors.Message.BIG_NUM, stringNumber); + return token; + } } if (ch == '\'') { nextCh(); + if (ch == '\r') { + nextCh(); + } + + var charContent = ch; + var lfEscaped = false; + var tickEscaped = false; + if (ch == '\\') { nextCh(); - final var charContent = switch (ch) { + charContent = switch (ch) { case 'n' -> '\n'; case 'r' -> '\r'; case 't' -> '\t'; case '\\' -> '\\'; case '\'' -> '\''; - default -> ' '; + default -> '\0'; }; - final var token = new Token(charConst, ogLine, ogCol); - token.val = charContent + ""; - token.numVal = charContent; - nextCh(); - nextCh(); - return token; + if (charContent == '\n') { + lfEscaped = true; + } + + if (charContent == '\'') { + tickEscaped = true; + } + + if (charContent == '\0') { + final var token = new Token(charConst, ogLine, ogCol); + error(token, Errors.Message.UNDEFINED_ESCAPE, ch); + } } - final var charContent = ch; - - if (charContent == LF || charContent == EOF || charContent == '\'') { + if (charContent == LF && !lfEscaped || charContent == EOF || charContent == '\'' && !tickEscaped) { final var token = new Token(charConst, ogLine, ogCol); final var message = switch (charContent) { case LF -> Errors.Message.ILLEGAL_LINE_END; @@ -191,14 +211,19 @@ public class Scanner { case '.' -> new Token(Token.Kind.period, ogLine, ogCol); case '{' -> new Token(Token.Kind.lbrace, ogLine, ogCol); case '}' -> new Token(Token.Kind.rbrace, ogLine, ogCol); + case '[' -> new Token(Token.Kind.lbrack, ogLine, ogCol); + case ']' -> new Token(Token.Kind.rbrack, ogLine, ogCol); + case '(' -> new Token(Token.Kind.lpar, ogLine, ogCol); + case ')' -> new Token(Token.Kind.rpar, ogLine, ogCol); case (char) -1 -> new Token(Token.Kind.eof, ogLine, ogCol); + case '~' -> new Token(Token.Kind.tilde, ogLine, ogCol); case '=' -> { if (ch == '=') { nextCh(); yield new Token(Token.Kind.eql, ogLine, ogCol); } - yield new Token(Token.Kind.assign, line, col); + yield new Token(Token.Kind.assign, ogLine, ogCol); } case '+' -> { if (ch == '=') { @@ -240,6 +265,18 @@ public class Scanner { yield new Token(Token.Kind.slashas, ogLine, ogCol); } + if (ch == '*') { + nextCh(); + final var success = skipComment(); + if (success) { + yield next(); + } else { + final var token = new Token(eof, ogLine, ogCol); + error(token, Errors.Message.EOF_IN_COMMENT); + yield next(); + } + } + yield new Token(Token.Kind.slash, ogLine, ogCol); } case '%' -> { @@ -256,8 +293,9 @@ public class Scanner { yield new Token(Token.Kind.neq, ogLine, ogCol); } - error(new Token(none, line, col), Errors.Message.INVALID_CHAR); - yield next(); + final var token = new Token(none, ogLine, ogCol); + error(token, Errors.Message.INVALID_CHAR, ogChar); + yield token; } case '>' -> { if (ch == '=') { @@ -268,7 +306,7 @@ public class Scanner { yield new Token(Token.Kind.gtr, ogLine, ogCol); } case '<' -> { - if (ch == '<') { + if (ch == '=') { nextCh(); yield new Token(Token.Kind.leq, ogLine, ogCol); } @@ -281,21 +319,24 @@ public class Scanner { yield new Token(Token.Kind.and, ogLine, ogCol); } - error(new Token(none, line, col), Errors.Message.INVALID_CHAR); - yield next(); + final var token = new Token(none, ogLine, ogCol); + error(token, Errors.Message.INVALID_CHAR, ogChar); + yield token; } case '|' -> { if (ch == '|') { nextCh(); - yield new Token(Token.Kind.and, ogLine, ogCol); + yield new Token(Token.Kind.or, ogLine, ogCol); } - error(new Token(none, line, col), Errors.Message.INVALID_CHAR); - yield next(); + final var token = new Token(none, ogLine, ogCol); + error(token, Errors.Message.INVALID_CHAR, ogChar); + yield token; } default -> { - error(new Token(none, line, col), Errors.Message.INVALID_CHAR); - yield next(); + final var token = new Token(none, ogLine, ogCol); + error(token, Errors.Message.INVALID_CHAR, ogChar); + yield token; } }; } @@ -333,7 +374,7 @@ public class Scanner { public String readName() { final var builder = new StringBuilder(); - while (isLetter(ch)) { + while (isLetter(ch) || ch == '_' || isDigit(ch)) { builder.append(ch); nextCh(); } @@ -341,7 +382,7 @@ public class Scanner { return builder.toString(); } - public int readNumber() { + public String readNumber() { final var builder = new StringBuilder(); while (isDigit(ch)) { @@ -349,7 +390,7 @@ public class Scanner { nextCh(); } - return Integer.parseInt(builder.toString()); + return builder.toString(); } private Token getTokenByName(String name, int line, int col) { @@ -364,6 +405,30 @@ public class Scanner { return token; } + private boolean skipComment() { + var commentCount = 1; + while (commentCount != 0) { + final var lastCh = ch; + nextCh(); + + if (lastCh == '/' && ch == '*') { + commentCount++; + nextCh(); + } + + if (lastCh == '*' && ch == '/') { + commentCount--; + nextCh(); + } + + if (ch == EOF && commentCount != 0) { + return false; + } + } + + return true; + } + // ================================================ // ================================================ }