initial commit
This commit is contained in:
294
MicroJava Compiler/src/ssw/mj/impl/Code.java
Normal file
294
MicroJava Compiler/src/ssw/mj/impl/Code.java
Normal file
@@ -0,0 +1,294 @@
|
||||
package ssw.mj.impl;
|
||||
|
||||
import ssw.mj.codegen.Label;
|
||||
import ssw.mj.codegen.Operand;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
public final class Code {
|
||||
|
||||
public enum OpCode {
|
||||
load,
|
||||
load_0,
|
||||
load_1,
|
||||
load_2,
|
||||
load_3,
|
||||
store,
|
||||
store_0,
|
||||
store_1,
|
||||
store_2,
|
||||
store_3,
|
||||
getstatic,
|
||||
putstatic,
|
||||
getfield,
|
||||
putfield,
|
||||
const_0,
|
||||
const_1,
|
||||
const_2,
|
||||
const_3,
|
||||
const_4,
|
||||
const_5,
|
||||
const_m1,
|
||||
const_,
|
||||
add,
|
||||
sub,
|
||||
mul,
|
||||
div,
|
||||
rem,
|
||||
neg,
|
||||
shl,
|
||||
shr,
|
||||
inc,
|
||||
new_,
|
||||
newarray,
|
||||
aload,
|
||||
astore,
|
||||
baload,
|
||||
bastore,
|
||||
arraylength,
|
||||
pop,
|
||||
dup,
|
||||
dup2,
|
||||
jmp,
|
||||
jeq,
|
||||
jne,
|
||||
jlt,
|
||||
jle,
|
||||
jgt,
|
||||
jge,
|
||||
call,
|
||||
return_,
|
||||
enter,
|
||||
exit,
|
||||
read,
|
||||
print,
|
||||
bread,
|
||||
bprint,
|
||||
trap,
|
||||
nop;
|
||||
|
||||
public int code() {
|
||||
return ordinal() + 1;
|
||||
}
|
||||
|
||||
public String cleanName() {
|
||||
String name = name();
|
||||
if (name.endsWith("_")) {
|
||||
name = name.substring(0, name.length() - 1);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
public static OpCode get(int code) {
|
||||
if (code < 1 || code > values().length) {
|
||||
return null;
|
||||
}
|
||||
return values()[code - 1];
|
||||
}
|
||||
}
|
||||
|
||||
public enum CompOp {
|
||||
eq, ne, lt, le, gt, ge;
|
||||
|
||||
public static CompOp invert(CompOp op) {
|
||||
if (op == null) {
|
||||
throw new IllegalArgumentException("Compare operator must not be null!");
|
||||
}
|
||||
return switch (op) {
|
||||
case eq -> ne;
|
||||
case ne -> eq;
|
||||
case lt -> ge;
|
||||
case le -> gt;
|
||||
case gt -> le;
|
||||
case ge -> lt;
|
||||
default ->
|
||||
// Cannot happen, we covered all six compare operations as well as null parameter
|
||||
// This is purely to prevent the compiler from complaining about a missing return statement
|
||||
throw new IllegalArgumentException("Impossible compare operator");
|
||||
};
|
||||
}
|
||||
|
||||
public static OpCode toOpCode(CompOp op) {
|
||||
return switch (op) {
|
||||
case eq -> OpCode.jeq;
|
||||
case ge -> OpCode.jge;
|
||||
case gt -> OpCode.jgt;
|
||||
case le -> OpCode.jle;
|
||||
case lt -> OpCode.jlt;
|
||||
case ne -> OpCode.jne;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Code buffer
|
||||
*/
|
||||
public byte[] buf;
|
||||
|
||||
/**
|
||||
* Program counter. Indicates next free byte in code buffer.
|
||||
*/
|
||||
public int pc;
|
||||
|
||||
/**
|
||||
* PC of main method (set by parser).
|
||||
*/
|
||||
public int mainpc;
|
||||
|
||||
/**
|
||||
* Length of static data in words (set by parser).
|
||||
*/
|
||||
public int dataSize;
|
||||
|
||||
/**
|
||||
* According parser.
|
||||
*/
|
||||
private final Parser parser;
|
||||
|
||||
// ----- initialization
|
||||
|
||||
public Code(Parser p) {
|
||||
parser = p;
|
||||
buf = new byte[100];
|
||||
pc = 0;
|
||||
mainpc = -1;
|
||||
dataSize = 0;
|
||||
}
|
||||
|
||||
// ----- code storage management
|
||||
|
||||
public void put(OpCode code) {
|
||||
put(code.code());
|
||||
}
|
||||
|
||||
public void put(int x) {
|
||||
if (pc == buf.length) {
|
||||
buf = Arrays.copyOf(buf, buf.length * 2);
|
||||
}
|
||||
buf[pc++] = (byte) x;
|
||||
}
|
||||
|
||||
public void put2(int x) {
|
||||
put(x >> 8);
|
||||
put(x);
|
||||
}
|
||||
|
||||
public void put4(int x) {
|
||||
put2(x >> 16);
|
||||
put2(x);
|
||||
}
|
||||
|
||||
public void put2(int pos, int x) {
|
||||
int oldpc = pc;
|
||||
pc = pos;
|
||||
put2(x);
|
||||
pc = oldpc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the code buffer to the output stream.
|
||||
*/
|
||||
public void write(OutputStream os) throws IOException {
|
||||
int codeSize = pc;
|
||||
|
||||
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
||||
DataOutputStream headerWriter = new DataOutputStream(header);
|
||||
headerWriter.writeByte('M');
|
||||
headerWriter.writeByte('J');
|
||||
headerWriter.writeInt(codeSize);
|
||||
headerWriter.writeInt(dataSize);
|
||||
headerWriter.writeInt(mainpc);
|
||||
headerWriter.close();
|
||||
|
||||
os.write(header.toByteArray());
|
||||
|
||||
os.write(buf, 0, codeSize);
|
||||
os.flush();
|
||||
os.close();
|
||||
}
|
||||
|
||||
// ======================================================
|
||||
// TODO Exercise UE-P-5-6: implementation of code generation
|
||||
// ======================================================
|
||||
|
||||
// TODO Exercise UE-P-5: Various code generation methods such as load or assign
|
||||
|
||||
/**
|
||||
* Load the operand x onto the expression stack.
|
||||
*/
|
||||
public void load(Operand x) {
|
||||
// TODO Exercise UE-P-5
|
||||
}
|
||||
|
||||
/**
|
||||
* Load an integer constant onto the expression stack.
|
||||
*/
|
||||
public void loadConst(int n) {
|
||||
// TODO Exercise UE-P-5
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an assignment x = y.
|
||||
*/
|
||||
public void assign(Operand x, Operand y) {
|
||||
// TODO Exercise UE-P-5
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an increment instruction that increments x by n.
|
||||
*/
|
||||
public void inc(Operand x, int n) {
|
||||
// TODO Exercise UE-P-5
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the left-hand side of a compound assignment.
|
||||
*/
|
||||
public void prepareLhsOfCompoundAssignment(Operand x) {
|
||||
Operand.Kind kindBeforeLoad = x.kind;
|
||||
// TODO Exercise UE-P-5
|
||||
// TODO: Field accesses (such as x.y) or array accesses (such as arr[2]) on the left-hand side of
|
||||
// an compound assignment (e.g., arr[2] += 4) need to correctly use dup or dup2 before load. Implement here.
|
||||
|
||||
|
||||
// Do not switch kind to Stack after loading x.
|
||||
// We still need its kind later on during the assign().
|
||||
x.kind = kindBeforeLoad;
|
||||
}
|
||||
|
||||
// --------------------
|
||||
|
||||
public void methodCall(Operand x) {
|
||||
// TODO Exercise UE-P-6
|
||||
}
|
||||
|
||||
/**
|
||||
* Unconditional jump.
|
||||
*/
|
||||
public void jump(Label lab) {
|
||||
// TODO Exercise UE-P-6
|
||||
}
|
||||
|
||||
/**
|
||||
* True Jump. Generates conditional jump instruction and links it to true
|
||||
* jump chain.
|
||||
*/
|
||||
public void tJump(CompOp op, Label to) {
|
||||
// TODO Exercise UE-P-6
|
||||
}
|
||||
|
||||
/**
|
||||
* False Jump. Generates conditional jump instruction and links it to false
|
||||
* jump chain.
|
||||
*/
|
||||
public void fJump(CompOp op, Label to) {
|
||||
// TODO Exercise UE-P-6
|
||||
}
|
||||
|
||||
// =================================================
|
||||
// =================================================
|
||||
}
|
||||
561
MicroJava Compiler/src/ssw/mj/impl/Parser.java
Normal file
561
MicroJava Compiler/src/ssw/mj/impl/Parser.java
Normal file
@@ -0,0 +1,561 @@
|
||||
package ssw.mj.impl;
|
||||
|
||||
import javassist.bytecode.analysis.ControlFlow;
|
||||
import javassist.compiler.ast.MethodDecl;
|
||||
import ssw.mj.Errors;
|
||||
import ssw.mj.Errors.Message;
|
||||
import ssw.mj.scanner.Token;
|
||||
|
||||
import javax.xml.stream.FactoryConfigurationError;
|
||||
import java.util.EnumSet;
|
||||
|
||||
import static ssw.mj.Errors.Message.*;
|
||||
import static ssw.mj.scanner.Token.Kind.*;
|
||||
|
||||
public final class Parser {
|
||||
|
||||
/**
|
||||
* Maximum number of global variables per program
|
||||
*/
|
||||
private static final int MAX_GLOBALS = 32767;
|
||||
|
||||
/**
|
||||
* Maximum number of fields per class
|
||||
*/
|
||||
private static final int MAX_FIELDS = 32767;
|
||||
|
||||
/**
|
||||
* Maximum number of local variables per method
|
||||
*/
|
||||
private static final int MAX_LOCALS = 127;
|
||||
|
||||
/**
|
||||
* Last recognized token;
|
||||
*/
|
||||
private Token t;
|
||||
|
||||
/**
|
||||
* Lookahead token (not recognized).)
|
||||
*/
|
||||
private Token la;
|
||||
|
||||
/**
|
||||
* Shortcut to kind attribute of lookahead token (la).
|
||||
*/
|
||||
private Token.Kind sym;
|
||||
|
||||
/**
|
||||
* According scanner
|
||||
*/
|
||||
public final Scanner scanner;
|
||||
|
||||
/**
|
||||
* According code buffer
|
||||
*/
|
||||
public final Code code;
|
||||
|
||||
/**
|
||||
* According symbol table
|
||||
*/
|
||||
public final Tab tab;
|
||||
|
||||
private static int MIN_ERROR_DIST = 3;
|
||||
|
||||
private int errorDist = MIN_ERROR_DIST;
|
||||
|
||||
public Parser(Scanner scanner) {
|
||||
this.scanner = scanner;
|
||||
tab = new Tab(this);
|
||||
code = new Code(this);
|
||||
// Pseudo token to avoid crash when 1st symbol has scanner error.
|
||||
la = new Token(none, 1, 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads ahead one symbol.
|
||||
*/
|
||||
private void scan() {
|
||||
t = la;
|
||||
la = scanner.next();
|
||||
sym = la.kind;
|
||||
errorDist++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies symbol and reads ahead.
|
||||
*/
|
||||
private void check(Token.Kind expected) {
|
||||
if (sym == expected) {
|
||||
scan();
|
||||
} else {
|
||||
error(TOKEN_EXPECTED, expected);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds error message to the list of errors.
|
||||
*/
|
||||
public void error(Message msg, Object... msgParams) {
|
||||
// TODO Exercise UE-P-3: Replace panic mode with error recovery (i.e., keep track of error distance)
|
||||
// TODO Exercise UE-P-3: Hint: Replacing panic mode also affects scan() method
|
||||
|
||||
if (errorDist >= MIN_ERROR_DIST) {
|
||||
scanner.errors.error(la.line, la.col, msg, msgParams);
|
||||
}
|
||||
|
||||
errorDist = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts the analysis.
|
||||
*/
|
||||
public void parse() {
|
||||
scan(); // scan first symbol, initializes look-ahead
|
||||
Program(); // start analysis
|
||||
check(eof);
|
||||
}
|
||||
|
||||
|
||||
// ===============================================
|
||||
// TODO Exercise UE-P-2: Implementation of parser
|
||||
// TODO Exercise UE-P-3: Error recovery methods
|
||||
// TODO Exercise UE-P-4: Symbol table handling
|
||||
// TODO Exercise UE-P-5-6: Code generation
|
||||
// ===============================================
|
||||
|
||||
// TODO Exercise UE-P-3: Error distance
|
||||
|
||||
// TODO Exercise UE-P-2 + Exercise 3: Sets to handle certain first, follow, and recover sets
|
||||
|
||||
// Initialize first and follow sets.
|
||||
static final EnumSet<Token.Kind> firstConstDecl = EnumSet.of(Token.Kind.final_);
|
||||
static final EnumSet<Token.Kind> firstVarDecl = EnumSet.of(Token.Kind.ident);
|
||||
static final EnumSet<Token.Kind> firstClassDecl = EnumSet.of(Token.Kind.class_);
|
||||
static final EnumSet<Token.Kind> breakDecl = EnumSet.of(lbrace, eof);
|
||||
static final EnumSet<Token.Kind> firstStatement = EnumSet.of(ident, if_, while_, break_, return_, read, print, lbrace, semicolon);
|
||||
static final EnumSet<Token.Kind> breakStatement = EnumSet.of(rbrace, else_, eof);
|
||||
static final EnumSet<Token.Kind> firstAsignop = EnumSet.of(assign, plusas, minusas, timesas, slashas, remas);
|
||||
static final EnumSet<Token.Kind> firstFactor = EnumSet.of(ident, number, charConst, new_, lpar);
|
||||
static final EnumSet<Token.Kind> firstMulop = EnumSet.of(times, slash, rem);
|
||||
static final EnumSet<Token.Kind> firstMethodDecl = EnumSet.of(ident, void_);
|
||||
static final EnumSet<Token.Kind> breakMethodDecl = EnumSet.of(rbrace, eof);
|
||||
|
||||
|
||||
static <T extends Enum<T>> EnumSet<T> enumUnion(EnumSet<T> first, EnumSet<T> ...sets) {
|
||||
final var copy = EnumSet.copyOf(first);
|
||||
for (final var set : sets) {
|
||||
copy.addAll(set);
|
||||
}
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
// ---------------------------------
|
||||
|
||||
// TODO Exercise UE-P-2: One top-down parsing method per production
|
||||
|
||||
/**
|
||||
* Program = <br>
|
||||
* "program" ident <br>
|
||||
* { ConstDecl | VarDecl | ClassDecl } <br>
|
||||
* "{" { MethodDecl } "}" .
|
||||
*/
|
||||
private void Program() {
|
||||
// TODO Exercise UE-P-2
|
||||
|
||||
check(Token.Kind.program);
|
||||
check(Token.Kind.ident);
|
||||
|
||||
while (!breakDecl.contains(sym)) {
|
||||
if (firstVarDecl.contains(sym)) {
|
||||
VarDecl();
|
||||
} else if (firstClassDecl.contains(sym)) {
|
||||
ClassDecl();
|
||||
} else if (firstConstDecl.contains(sym)){
|
||||
ConstDecl();
|
||||
} else {
|
||||
recoverDecl();
|
||||
}
|
||||
}
|
||||
|
||||
check(lbrace);
|
||||
|
||||
while (!breakMethodDecl.contains(sym)) {
|
||||
if (firstMethodDecl.contains(sym)) {
|
||||
MethodDecl();
|
||||
} else {
|
||||
recoverMethodDecl();
|
||||
}
|
||||
}
|
||||
|
||||
check(rbrace);
|
||||
}
|
||||
|
||||
private void ConstDecl() {
|
||||
check(Token.Kind.final_);
|
||||
Type();
|
||||
check(Token.Kind.ident);
|
||||
check(Token.Kind.assign);
|
||||
|
||||
if (sym == Token.Kind.number) {
|
||||
scan();
|
||||
} else if (sym == Token.Kind.charConst) {
|
||||
scan();
|
||||
} else {
|
||||
error(TOKEN_EXPECTED, "number or character constant");
|
||||
}
|
||||
|
||||
check(Token.Kind.semicolon);
|
||||
}
|
||||
|
||||
private void VarDecl() {
|
||||
Type();
|
||||
check(ident);
|
||||
|
||||
while (sym == Token.Kind.comma) {
|
||||
scan();
|
||||
check(ident);
|
||||
}
|
||||
|
||||
check(semicolon);
|
||||
}
|
||||
|
||||
private void ClassDecl() {
|
||||
check(class_);
|
||||
check(ident);
|
||||
check(lbrace);
|
||||
|
||||
while (firstVarDecl.contains(sym)) {
|
||||
VarDecl();
|
||||
}
|
||||
|
||||
check(rbrace);
|
||||
}
|
||||
|
||||
private void Type() {
|
||||
check(ident);
|
||||
|
||||
if (sym == lbrack) {
|
||||
scan();
|
||||
check(rbrack);
|
||||
}
|
||||
}
|
||||
|
||||
private void MethodDecl() {
|
||||
if (sym == ident) {
|
||||
Type();
|
||||
} else if (sym == void_) {
|
||||
scan();
|
||||
}
|
||||
|
||||
check(ident);
|
||||
check(lpar);
|
||||
|
||||
if (sym == ident) {
|
||||
FormPars();
|
||||
}
|
||||
|
||||
check(rpar);
|
||||
|
||||
while (firstVarDecl.contains(sym)) {
|
||||
VarDecl();
|
||||
}
|
||||
|
||||
Block();
|
||||
}
|
||||
|
||||
private void FormPars() {
|
||||
Type();
|
||||
check(ident);
|
||||
|
||||
while (sym == comma) {
|
||||
scan();
|
||||
Type();
|
||||
check(ident);
|
||||
}
|
||||
}
|
||||
|
||||
private void Block() {
|
||||
check(lbrace);
|
||||
Statement();
|
||||
check(rbrace);
|
||||
}
|
||||
|
||||
private void Statement() {
|
||||
while (!breakStatement.contains(sym)) {
|
||||
switch (sym) {
|
||||
case ident -> {
|
||||
Designator();
|
||||
|
||||
if (firstAsignop.contains(sym)) {
|
||||
AssignOp();
|
||||
Expr();
|
||||
} else if (sym == lpar) {
|
||||
ActPars();
|
||||
} else if (sym == pplus) {
|
||||
scan();
|
||||
} else if (sym == mminus) {
|
||||
scan();
|
||||
} else {
|
||||
error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)");
|
||||
}
|
||||
|
||||
check(semicolon);
|
||||
}
|
||||
case if_ -> {
|
||||
scan();
|
||||
check(lpar);
|
||||
Condition();
|
||||
check(rpar);
|
||||
Statement();
|
||||
|
||||
if (sym == else_) {
|
||||
scan();
|
||||
Statement();
|
||||
}
|
||||
}
|
||||
case while_ -> {
|
||||
scan();
|
||||
check(lpar);
|
||||
Condition();
|
||||
check(rpar);
|
||||
Statement();
|
||||
}
|
||||
case break_-> {
|
||||
scan();
|
||||
check(semicolon);
|
||||
}
|
||||
case return_ -> {
|
||||
scan();
|
||||
|
||||
if (sym == minus || firstFactor.contains(sym)) {
|
||||
Expr();
|
||||
}
|
||||
|
||||
check(semicolon);
|
||||
}
|
||||
case read -> {
|
||||
scan();
|
||||
check(lpar);
|
||||
Designator();
|
||||
check(rpar);
|
||||
check(semicolon);
|
||||
}
|
||||
case print -> {
|
||||
scan();
|
||||
check(lpar);
|
||||
Expr();
|
||||
|
||||
if (sym == comma) {
|
||||
scan();
|
||||
check(number);
|
||||
}
|
||||
|
||||
check(rpar);
|
||||
check(semicolon);
|
||||
}
|
||||
case lbrace -> Block();
|
||||
case semicolon -> scan();
|
||||
default -> recoverStatement();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private void Designator() {
|
||||
check(ident);
|
||||
|
||||
while (sym == period || sym == lbrack) {
|
||||
if (sym == period) {
|
||||
scan();
|
||||
check(ident);
|
||||
} else {
|
||||
scan();
|
||||
|
||||
if (sym == tilde) {
|
||||
scan();
|
||||
}
|
||||
|
||||
Expr();
|
||||
|
||||
check(rbrack);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void AssignOp() {
|
||||
switch (sym) {
|
||||
case assign -> scan();
|
||||
case plusas -> scan();
|
||||
case minusas -> scan();
|
||||
case timesas -> scan();
|
||||
case slashas -> scan();
|
||||
case remas -> scan();
|
||||
default -> error(TOKEN_EXPECTED, "unexpected token. assignment token (=, +=, -=, *=, /=, %=), method call (\"(\"), increment (++) or decrement (--)");
|
||||
}
|
||||
}
|
||||
|
||||
private void Expr() {
|
||||
if (sym == minus) {
|
||||
scan();
|
||||
}
|
||||
|
||||
Term();
|
||||
|
||||
while (sym == plus || sym == minus) {
|
||||
Addop();
|
||||
Term();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void ActPars() {
|
||||
|
||||
check(lpar);
|
||||
|
||||
if (sym == minus || firstFactor.contains(sym)) {
|
||||
Expr();
|
||||
|
||||
while (sym == comma) {
|
||||
scan();
|
||||
Expr();
|
||||
}
|
||||
}
|
||||
|
||||
check(rpar);
|
||||
}
|
||||
|
||||
private void Term() {
|
||||
Factor();
|
||||
|
||||
while (firstMulop.contains(sym)) {
|
||||
Mulop();
|
||||
Factor();
|
||||
}
|
||||
}
|
||||
|
||||
private void Addop() {
|
||||
if (sym == plus) {
|
||||
scan();
|
||||
} else if (sym == minus) {
|
||||
scan();
|
||||
} else {
|
||||
error(TOKEN_EXPECTED, "minus or plus expected");
|
||||
}
|
||||
}
|
||||
|
||||
private void Factor() {
|
||||
switch(sym) {
|
||||
case ident -> Designator();
|
||||
case number -> scan();
|
||||
case charConst -> scan();
|
||||
case new_ -> {
|
||||
scan();
|
||||
check(ident);
|
||||
if (sym == lbrack) {
|
||||
scan();
|
||||
Expr();
|
||||
check(rbrack);
|
||||
}
|
||||
}
|
||||
case lpar -> {
|
||||
scan();
|
||||
Expr();
|
||||
check(rpar);
|
||||
}
|
||||
default -> error(TOKEN_EXPECTED, "unexpected token. identifier, number, character constant, new or \"(\"");
|
||||
}
|
||||
}
|
||||
|
||||
private void Mulop() {
|
||||
if (sym == times) {
|
||||
scan();
|
||||
} else if (sym == slash) {
|
||||
scan();
|
||||
} else if (sym == rem) {
|
||||
scan();
|
||||
} else {
|
||||
error(TOKEN_EXPECTED, "expected *, /, %");
|
||||
}
|
||||
}
|
||||
|
||||
private void Condition() {
|
||||
CondTerm();
|
||||
|
||||
while (sym == or) {
|
||||
scan();
|
||||
CondTerm();
|
||||
}
|
||||
}
|
||||
|
||||
private void CondTerm() {
|
||||
CondFact();
|
||||
|
||||
while (sym == and) {
|
||||
scan();
|
||||
CondFact();
|
||||
}
|
||||
}
|
||||
|
||||
private void CondFact() {
|
||||
Expr();
|
||||
Relop();
|
||||
Expr();
|
||||
}
|
||||
|
||||
private void Relop() {
|
||||
switch (sym) {
|
||||
case eql -> scan();
|
||||
case neq -> scan();
|
||||
case gtr -> scan();
|
||||
case lss -> scan();
|
||||
case leq -> scan();
|
||||
case geq -> scan();
|
||||
default -> error(TOKEN_EXPECTED, "unexpected token. ==, !=, >, >=, <, <=");
|
||||
}
|
||||
}
|
||||
|
||||
// ...
|
||||
|
||||
// ------------------------------------
|
||||
|
||||
// TODO Exercise UE-P-3: Error recovery methods: recoverDecl, recoverMethodDecl and recoverStat (+ TODO Exercise UE-P-5: Check idents for Type kind)
|
||||
|
||||
private void recoverDecl() {
|
||||
final var firstDeclarationUnion = enumUnion(firstConstDecl, firstVarDecl, firstClassDecl);
|
||||
final var recoverDeclSet = enumUnion(breakDecl, firstDeclarationUnion);
|
||||
error(DECLARATION_RECOVERY);
|
||||
|
||||
do {
|
||||
scan();
|
||||
} while (!recoverDeclSet.contains(sym));
|
||||
errorDist = 0;
|
||||
}
|
||||
|
||||
private void recoverMethodDecl() {
|
||||
error(METHOD_DECL_RECOVERY);
|
||||
|
||||
final var recoveryMethoDeclSet = enumUnion(breakMethodDecl, firstMethodDecl);
|
||||
do {
|
||||
scan();
|
||||
} while (!recoveryMethoDeclSet.contains(sym));
|
||||
errorDist = 0;
|
||||
}
|
||||
|
||||
private void recoverStatement() {
|
||||
error(STATEMENT_RECOVERY);
|
||||
|
||||
var recoveryStatementSet = enumUnion(breakStatement, firstStatement);
|
||||
recoveryStatementSet.remove(ident);
|
||||
recoveryStatementSet.remove(lbrace);
|
||||
|
||||
do {
|
||||
scan();
|
||||
} while (!recoveryStatementSet.contains(sym));
|
||||
errorDist = 0;
|
||||
}
|
||||
|
||||
// ====================================
|
||||
// ====================================
|
||||
}
|
||||
434
MicroJava Compiler/src/ssw/mj/impl/Scanner.java
Normal file
434
MicroJava Compiler/src/ssw/mj/impl/Scanner.java
Normal file
@@ -0,0 +1,434 @@
|
||||
package ssw.mj.impl;
|
||||
|
||||
import ssw.mj.Errors;
|
||||
import ssw.mj.Interpreter;
|
||||
import ssw.mj.scanner.Token;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
import static ssw.mj.scanner.Token.Kind.*;
|
||||
import static ssw.mj.scanner.Token.Kind.number;
|
||||
|
||||
public class Scanner {
|
||||
|
||||
// Scanner Skeleton - do not rename fields / methods !
|
||||
private static final char EOF = (char) -1;
|
||||
private static final char LF = '\n';
|
||||
|
||||
/**
|
||||
* Input data to read from.
|
||||
*/
|
||||
private final Reader in;
|
||||
|
||||
/**
|
||||
* Lookahead character. (= next (unhandled) character in the input stream)
|
||||
*/
|
||||
private char ch;
|
||||
|
||||
/**
|
||||
* Current line in input stream.
|
||||
*/
|
||||
private int line;
|
||||
|
||||
/**
|
||||
* Current column in input stream.
|
||||
*/
|
||||
private int col;
|
||||
|
||||
/**
|
||||
* According errors object.
|
||||
*/
|
||||
public final Errors errors;
|
||||
|
||||
public Scanner(Reader r) {
|
||||
// store reader
|
||||
in = r;
|
||||
|
||||
// initialize error handling support
|
||||
errors = new Errors();
|
||||
|
||||
line = 1;
|
||||
col = 0;
|
||||
nextCh(); // read 1st char into ch, incr col to 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds error message to the list of errors.
|
||||
*/
|
||||
public final void error(Token t, Errors.Message msg, Object... msgParams) {
|
||||
errors.error(t.line, t.col, msg, msgParams);
|
||||
|
||||
// reset token content (consistent JUnit tests)
|
||||
t.numVal = 0;
|
||||
t.val = null;
|
||||
}
|
||||
|
||||
|
||||
// ================================================
|
||||
// TODO Exercise UE-P-1: Implement Scanner (next() + private helper methods)
|
||||
// ================================================
|
||||
|
||||
// TODO Exercise UE-P-1: Keywords
|
||||
/**
|
||||
* Mapping from keyword names to appropriate token codes.
|
||||
*/
|
||||
private static final Map<String, Token.Kind> keywords;
|
||||
|
||||
static {
|
||||
keywords = Map.ofEntries(
|
||||
Map.entry("void", void_),
|
||||
Map.entry("class", class_),
|
||||
Map.entry("program", program),
|
||||
Map.entry("if", if_),
|
||||
Map.entry("else", else_),
|
||||
Map.entry("while", while_),
|
||||
Map.entry("read", read),
|
||||
Map.entry("print", print),
|
||||
Map.entry("return", return_),
|
||||
Map.entry("break", break_),
|
||||
Map.entry("final", final_),
|
||||
Map.entry("new", new_)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns next token. To be used by parser.
|
||||
*/
|
||||
public Token next() {
|
||||
// TODO Exercise UE-P-1: implementation of next method
|
||||
while (Character.isWhitespace(ch)) {
|
||||
nextCh();
|
||||
}
|
||||
|
||||
final var ogLine = line;
|
||||
final var ogCol = col;
|
||||
|
||||
if (isLetter(ch)) {
|
||||
final var name = readName();
|
||||
|
||||
return getTokenByName(name, ogLine, ogCol);
|
||||
}
|
||||
|
||||
if (isDigit(ch)) {
|
||||
final var stringNumber = readNumber();
|
||||
|
||||
try {
|
||||
final var number = Integer.parseInt(stringNumber);
|
||||
|
||||
var token = new Token(Token.Kind.number, ogLine, ogCol);
|
||||
token.numVal = number;
|
||||
token.val = stringNumber;
|
||||
|
||||
return token;
|
||||
} catch (NumberFormatException exception) {
|
||||
final var token = new Token(number, ogLine, ogCol);
|
||||
error(token, Errors.Message.BIG_NUM, stringNumber);
|
||||
return token;
|
||||
}
|
||||
}
|
||||
|
||||
if (ch == '\'') {
|
||||
nextCh();
|
||||
|
||||
if (ch == '\r') {
|
||||
nextCh();
|
||||
}
|
||||
|
||||
var charContent = ch;
|
||||
var lfEscaped = false;
|
||||
var tickEscaped = false;
|
||||
|
||||
if (ch == '\\') {
|
||||
nextCh();
|
||||
charContent = switch (ch) {
|
||||
case 'n' -> '\n';
|
||||
case 'r' -> '\r';
|
||||
case 't' -> '\t';
|
||||
case '\\' -> '\\';
|
||||
case '\'' -> '\'';
|
||||
default -> '\0';
|
||||
};
|
||||
|
||||
if (charContent == '\n') {
|
||||
lfEscaped = true;
|
||||
}
|
||||
|
||||
if (charContent == '\'') {
|
||||
tickEscaped = true;
|
||||
}
|
||||
|
||||
if (charContent == '\0') {
|
||||
final var token = new Token(charConst, ogLine, ogCol);
|
||||
error(token, Errors.Message.UNDEFINED_ESCAPE, ch);
|
||||
}
|
||||
}
|
||||
|
||||
if (charContent == LF && !lfEscaped || charContent == EOF || charContent == '\'' && !tickEscaped) {
|
||||
final var token = new Token(charConst, ogLine, ogCol);
|
||||
final var message = switch (charContent) {
|
||||
case LF -> Errors.Message.ILLEGAL_LINE_END;
|
||||
case EOF -> Errors.Message.EOF_IN_CHAR;
|
||||
case '\'' -> Errors.Message.EMPTY_CHARCONST;
|
||||
default -> null;
|
||||
};
|
||||
|
||||
error(token, message);
|
||||
token.val = '\0' + "";
|
||||
token.numVal = 0;
|
||||
|
||||
if (charContent == '\'') {
|
||||
nextCh();
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
nextCh();
|
||||
|
||||
if (ch != '\'') {
|
||||
final var token = new Token(charConst, ogLine, ogCol);
|
||||
error(token, Errors.Message.MISSING_QUOTE);
|
||||
token.val = '\0' + "";
|
||||
token.numVal = 0;
|
||||
return token;
|
||||
}
|
||||
|
||||
nextCh();
|
||||
|
||||
final var token = new Token(charConst, ogLine, ogCol);
|
||||
token.val = charContent + "";
|
||||
token.numVal = charContent;
|
||||
return token;
|
||||
}
|
||||
|
||||
final var ogChar = ch;
|
||||
nextCh();
|
||||
return switch(ogChar) {
|
||||
case ';' -> new Token(Token.Kind.semicolon, ogLine, ogCol);
|
||||
case ',' -> new Token(Token.Kind.comma, ogLine, ogCol);
|
||||
case '.' -> new Token(Token.Kind.period, ogLine, ogCol);
|
||||
case '{' -> new Token(Token.Kind.lbrace, ogLine, ogCol);
|
||||
case '}' -> new Token(Token.Kind.rbrace, ogLine, ogCol);
|
||||
case '[' -> new Token(Token.Kind.lbrack, ogLine, ogCol);
|
||||
case ']' -> new Token(Token.Kind.rbrack, ogLine, ogCol);
|
||||
case '(' -> new Token(Token.Kind.lpar, ogLine, ogCol);
|
||||
case ')' -> new Token(Token.Kind.rpar, ogLine, ogCol);
|
||||
case (char) -1 -> new Token(Token.Kind.eof, ogLine, ogCol);
|
||||
case '~' -> new Token(Token.Kind.tilde, ogLine, ogCol);
|
||||
case '=' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.eql, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.assign, ogLine, ogCol);
|
||||
}
|
||||
case '+' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.plusas, ogLine, ogCol);
|
||||
}
|
||||
|
||||
if (ch == '+') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.pplus, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.plus, ogLine, ogCol);
|
||||
}
|
||||
case '-' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.minusas, ogLine, ogCol);
|
||||
}
|
||||
|
||||
if (ch == '-') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.mminus, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.minus, ogLine, ogCol);
|
||||
}
|
||||
case '*' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.timesas, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.times, ogLine, ogCol);
|
||||
}
|
||||
case '/' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.slashas, ogLine, ogCol);
|
||||
}
|
||||
|
||||
if (ch == '*') {
|
||||
nextCh();
|
||||
final var success = skipComment();
|
||||
if (success) {
|
||||
yield next();
|
||||
} else {
|
||||
final var token = new Token(eof, ogLine, ogCol);
|
||||
error(token, Errors.Message.EOF_IN_COMMENT);
|
||||
yield next();
|
||||
}
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.slash, ogLine, ogCol);
|
||||
}
|
||||
case '%' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.remas, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.rem, ogLine, ogCol);
|
||||
}
|
||||
case '!' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.neq, ogLine, ogCol);
|
||||
}
|
||||
|
||||
final var token = new Token(none, ogLine, ogCol);
|
||||
error(token, Errors.Message.INVALID_CHAR, ogChar);
|
||||
yield token;
|
||||
}
|
||||
case '>' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.geq, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.gtr, ogLine, ogCol);
|
||||
}
|
||||
case '<' -> {
|
||||
if (ch == '=') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.leq, ogLine, ogCol);
|
||||
}
|
||||
|
||||
yield new Token(Token.Kind.lss, ogLine, ogCol);
|
||||
}
|
||||
case '&' -> {
|
||||
if (ch == '&') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.and, ogLine, ogCol);
|
||||
}
|
||||
|
||||
final var token = new Token(none, ogLine, ogCol);
|
||||
error(token, Errors.Message.INVALID_CHAR, ogChar);
|
||||
yield token;
|
||||
}
|
||||
case '|' -> {
|
||||
if (ch == '|') {
|
||||
nextCh();
|
||||
yield new Token(Token.Kind.or, ogLine, ogCol);
|
||||
}
|
||||
|
||||
final var token = new Token(none, ogLine, ogCol);
|
||||
error(token, Errors.Message.INVALID_CHAR, ogChar);
|
||||
yield token;
|
||||
}
|
||||
default -> {
|
||||
final var token = new Token(none, ogLine, ogCol);
|
||||
error(token, Errors.Message.INVALID_CHAR, ogChar);
|
||||
yield token;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void nextCh() {
|
||||
// TODO Exercise UE-P-1: implementation of nextCh method and other private helper methods
|
||||
try {
|
||||
final var intChar = in.read();
|
||||
final var nextChar = (char) intChar;
|
||||
|
||||
if (ch == '\n') {
|
||||
line++;
|
||||
col = 0;
|
||||
}
|
||||
|
||||
col++;
|
||||
ch = nextChar;
|
||||
} catch (IOException e) {
|
||||
System.err.println("Could not read Stream");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO Exercise UE-P-1: private helper methods used by next(), as discussed in the exercise
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
private boolean isLetter(char c) {
|
||||
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z';
|
||||
}
|
||||
|
||||
private boolean isDigit(char c) {
|
||||
return '0' <= c && c <= '9';
|
||||
}
|
||||
|
||||
public String readName() {
|
||||
final var builder = new StringBuilder();
|
||||
|
||||
while (isLetter(ch) || ch == '_' || isDigit(ch)) {
|
||||
builder.append(ch);
|
||||
nextCh();
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public String readNumber() {
|
||||
final var builder = new StringBuilder();
|
||||
|
||||
while (isDigit(ch)) {
|
||||
builder.append(ch);
|
||||
nextCh();
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private Token getTokenByName(String name, int line, int col) {
|
||||
final var kind = keywords.getOrDefault(name, Token.Kind.ident);
|
||||
|
||||
var token = new Token(kind, line, col);
|
||||
|
||||
if (kind == Token.Kind.ident) {
|
||||
token.val = name;
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
private boolean skipComment() {
|
||||
var commentCount = 1;
|
||||
while (commentCount != 0) {
|
||||
final var lastCh = ch;
|
||||
nextCh();
|
||||
|
||||
if (lastCh == '/' && ch == '*') {
|
||||
commentCount++;
|
||||
nextCh();
|
||||
}
|
||||
|
||||
if (lastCh == '*' && ch == '/') {
|
||||
commentCount--;
|
||||
nextCh();
|
||||
}
|
||||
|
||||
if (ch == EOF && commentCount != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================
|
||||
// ================================================
|
||||
}
|
||||
100
MicroJava Compiler/src/ssw/mj/impl/Tab.java
Normal file
100
MicroJava Compiler/src/ssw/mj/impl/Tab.java
Normal file
@@ -0,0 +1,100 @@
|
||||
package ssw.mj.impl;
|
||||
|
||||
import ssw.mj.symtab.Obj;
|
||||
import ssw.mj.symtab.Scope;
|
||||
import ssw.mj.symtab.Struct;
|
||||
|
||||
public final class Tab {
|
||||
|
||||
// Universe
|
||||
public static final Struct noType = new Struct(Struct.Kind.None);
|
||||
public static final Struct intType = new Struct(Struct.Kind.Int);
|
||||
public static final Struct charType = new Struct(Struct.Kind.Char);
|
||||
public static final Struct nullType = new Struct(Struct.Kind.Class);
|
||||
|
||||
public final Obj noObj, chrObj;
|
||||
public Obj ordObj, lenObj;
|
||||
|
||||
/**
|
||||
* Only used for reporting errors.
|
||||
*/
|
||||
private final Parser parser;
|
||||
/**
|
||||
* The current top scope.
|
||||
*/
|
||||
public Scope curScope = null;
|
||||
// First scope opening (universe) will increase this to -1
|
||||
/**
|
||||
* Nesting level of current scope.
|
||||
*/
|
||||
private int curLevel = -2;
|
||||
|
||||
public Tab(Parser p) {
|
||||
parser = p;
|
||||
|
||||
// setting up "universe" (= predefined names)
|
||||
// opening scope (curLevel goes to -1, which is the universe level)
|
||||
openScope();
|
||||
|
||||
noObj = new Obj(Obj.Kind.Var, "noObj", noType);
|
||||
|
||||
insert(Obj.Kind.Type, "int", intType);
|
||||
insert(Obj.Kind.Type, "char", charType);
|
||||
insert(Obj.Kind.Con, "null", nullType);
|
||||
|
||||
chrObj = insert(Obj.Kind.Meth, "chr", charType);
|
||||
openScope();
|
||||
Obj iVarObj = insert(Obj.Kind.Var, "i", intType);
|
||||
iVarObj.level = 1;
|
||||
chrObj.nPars = curScope.nVars();
|
||||
chrObj.locals = curScope.locals();
|
||||
closeScope();
|
||||
|
||||
// TODO Exercise UE-P-4: build "ord" universe method and store in ordObj
|
||||
|
||||
// TODO Exercise UE-P-4: build "len" universe method and store in lenObj
|
||||
|
||||
// still on level -1
|
||||
// now that the universe is constructed, the next node that will be added is the Program itself
|
||||
// (which will open its own scope with level 0)
|
||||
}
|
||||
|
||||
// ===============================================
|
||||
// TODO Exercise UE-P-4: implementation of symbol table
|
||||
// ===============================================
|
||||
|
||||
public void openScope() {
|
||||
curScope = new Scope(curScope);
|
||||
curLevel++;
|
||||
}
|
||||
|
||||
public void closeScope() {
|
||||
curScope = curScope.outer();
|
||||
curLevel--;
|
||||
}
|
||||
|
||||
public Obj insert(Obj.Kind kind, String name, Struct type) {
|
||||
// TODO Exercise UE-P-4
|
||||
return noObj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the object with <code>name</code> from the innermost scope.
|
||||
*/
|
||||
public Obj find(String name) {
|
||||
// TODO Exercise UE-P-4
|
||||
return noObj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the field <code>name</code> from the fields of
|
||||
* <code>type</code>.
|
||||
*/
|
||||
public Obj findField(String name, Struct type) {
|
||||
// TODO Exercise UE-P-4
|
||||
return noObj;
|
||||
}
|
||||
|
||||
// ===============================================
|
||||
// ===============================================
|
||||
}
|
||||
Reference in New Issue
Block a user