Creating textual IR parser

This commit is contained in:
Alexey Andreev 2016-11-27 16:59:47 +03:00
parent 55158c9e13
commit 93f8c2cf39
12 changed files with 592 additions and 154 deletions

View File

@ -25,6 +25,7 @@ public class BasicBlock implements BasicBlockReader {
private List<Instruction> instructions = new ArrayList<>();
private List<TryCatchBlock> tryCatchBlocks = new ArrayList<>();
private Variable exceptionVariable;
private String label;
BasicBlock(Program program, int index) {
this.program = program;
@ -257,4 +258,12 @@ public class BasicBlock implements BasicBlockReader {
public void setExceptionVariable(Variable exceptionVariable) {
this.exceptionVariable = exceptionVariable;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
}

View File

@ -20,6 +20,7 @@ public class Variable implements VariableReader {
private int index;
private int register;
private String debugName;
private String label;
Variable(Program program) {
this.program = program;
@ -60,4 +61,12 @@ public class Variable implements VariableReader {
public void setDebugName(String debugName) {
this.debugName = debugName;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
}

View File

@ -15,10 +15,6 @@
*/
package org.teavm.model.instructions;
/**
*
* @author Alexey Andreev
*/
public enum ArrayElementType {
CHAR,
BYTE,

View File

@ -21,11 +21,11 @@ import java.util.stream.Collectors;
import org.teavm.model.*;
import org.teavm.model.instructions.*;
public class InstructionStringifier implements InstructionReader {
class InstructionStringifier implements InstructionReader {
private TextLocation location;
private StringBuilder sb;
public InstructionStringifier(StringBuilder sb) {
InstructionStringifier(StringBuilder sb) {
this.sb = sb;
}
@ -75,7 +75,41 @@ public class InstructionStringifier implements InstructionReader {
@Override
public void stringConstant(VariableReader receiver, String cst) {
sb.append("@").append(receiver.getIndex()).append(" := '").append(cst).append("'");
sb.append("@").append(receiver.getIndex()).append(" := '");
escapeStringLiteral(cst, sb);
sb.append("'");
}
static void escapeStringLiteral(String s, StringBuilder sb) {
for (int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
switch (c) {
case '\n':
sb.append("\\n");
break;
case '\t':
sb.append("\\t");
break;
case '\'':
sb.append("\\'");
break;
case '\\':
sb.append("\\\\");
break;
default:
if (c < ' ') {
sb.append("\\u");
int pos = 12;
for (int j = 0; j < 4; ++j) {
sb.append(Character.forDigit((c >> pos) & 0xF, 16));
pos -= 4;
}
} else {
sb.append(c);
}
break;
}
}
}
@Override
@ -245,13 +279,13 @@ public class InstructionStringifier implements InstructionReader {
@Override
public void createArray(VariableReader receiver, ValueType itemType, VariableReader size) {
sb.append("@").append(receiver.getIndex()).append(" = new ").append(itemType).append("[@")
sb.append("@").append(receiver.getIndex()).append(" := new ").append(itemType).append("[@")
.append(size.getIndex()).append(']');
}
@Override
public void createArray(VariableReader receiver, ValueType itemType, List<? extends VariableReader> dimensions) {
sb.append("@").append(receiver.getIndex()).append(" = new ").append(itemType).append("[");
sb.append("@").append(receiver.getIndex()).append(" := new ").append(itemType).append("[");
for (int i = 0; i < dimensions.size(); ++i) {
if (i > 0) {
sb.append(", ");
@ -263,7 +297,7 @@ public class InstructionStringifier implements InstructionReader {
@Override
public void create(VariableReader receiver, String type) {
sb.append("@").append(receiver.getIndex()).append(" = new ").append(type).append("");
sb.append("@").append(receiver.getIndex()).append(" := new ").append(type).append("");
}
@Override
@ -296,20 +330,20 @@ public class InstructionStringifier implements InstructionReader {
@Override
public void unwrapArray(VariableReader receiver, VariableReader array, ArrayElementType elementType) {
sb.append("@").append(receiver.getIndex()).append(" := @").append(array.getIndex()).append(".data");
sb.append("@").append(receiver.getIndex()).append(" := data @").append(array.getIndex()).append("");
}
@Override
public void getElement(VariableReader receiver, VariableReader array, VariableReader index,
ArrayElementType type) {
sb.append("@").append(receiver.getIndex()).append(" := @").append(array.getIndex()).append("[@")
.append(index.getIndex()).append("]");
.append(index.getIndex()).append("]").append(" as " + type.name().toLowerCase());
}
@Override
public void putElement(VariableReader array, VariableReader index, VariableReader value, ArrayElementType type) {
sb.append("@").append(array.getIndex()).append("[@").append(index.getIndex()).append("] := @")
.append(value.getIndex());
.append(value.getIndex()).append(" as " + type.name().toLowerCase());
}
@Override

View File

@ -26,16 +26,16 @@ public class ListingBuilder {
StringBuilder insnSb = new StringBuilder();
InstructionStringifier stringifier = new InstructionStringifier(insnSb);
for (int i = 0; i < program.variableCount(); ++i) {
sb.append(prefix).append("var @").append(i);
VariableReader var = program.variableAt(i);
if (var != null && var.getDebugName() != null) {
sb.append(" as ").append(var.getDebugName());
if (var == null || var.getDebugName() == null) {
continue;
}
sb.append(prefix).append("var @").append(i);
sb.append('\n');
}
for (int i = 0; i < program.basicBlockCount(); ++i) {
BasicBlockReader block = program.basicBlockAt(i);
sb.append(prefix).append("$").append(i).append(":\n");
sb.append(prefix).append("$").append(i).append("\n");
if (block == null) {
continue;
}
@ -65,8 +65,15 @@ public class ListingBuilder {
block.readInstruction(j, stringifier);
if (!Objects.equals(location, stringifier.getLocation())) {
location = stringifier.getLocation();
sb.append(prefix).append(" at ").append(location != null ? location.toString()
: "unknown location").append('\n');
sb.append(prefix).append(" at ");
if (location == null) {
sb.append("unknown location");
} else {
sb.append("'");
InstructionStringifier.escapeStringLiteral(location.getFileName(), sb);
sb.append("' " + location.getLine());
}
sb.append('\n');
}
sb.append(prefix).append(" ").append(insnSb).append("\n");
}

View File

@ -58,136 +58,129 @@ class ListingLexer {
}
skipWhiteSpace();
do {
switch (c) {
case -1:
token = ListingToken.EOF;
break;
case '@':
switch (c) {
case -1:
token = ListingToken.EOF;
break;
case '\n':
token = ListingToken.EOL;
nextChar();
break;
case '@':
readVariable();
break;
case '$':
readLabel();
break;
case '\'':
readString();
break;
case ':':
nextChar();
expect('=');
token = ListingToken.ASSIGN;
break;
case '=':
nextChar();
expect('=');
token = ListingToken.EQUAL;
break;
case '!':
nextChar();
expect('=');
token = ListingToken.NOT_EQUAL;
break;
case '<':
nextChar();
if (c == '=') {
nextChar();
readVariable();
break;
case '$':
readLabel();
break;
case '\'':
readString();
break;
case ':':
token = ListingToken.LESS_OR_EQUAL;
} else if (c == '<') {
nextChar();
if (c == '=') {
token = ListingToken.SHIFT_LEFT;
} else {
token = ListingToken.LESS;
}
break;
case '>':
nextChar();
if (c == '=') {
nextChar();
token = ListingToken.GREATER_OR_EQUAL;
} else if (c == '>') {
nextChar();
if (c == '>') {
nextChar();
token = ListingToken.ASSIGN;
token = ListingToken.SHIFT_RIGHT_UNSIGNED;
} else {
token = ListingToken.COLON;
token = ListingToken.SHIFT_RIGHT;
}
break;
case '=':
nextChar();
expect('=');
token = ListingToken.EQUAL;
break;
case '!':
nextChar();
expect('=');
token = ListingToken.NOT_EQUAL;
break;
case '<':
nextChar();
if (c == '=') {
nextChar();
token = ListingToken.LESS_OR_EQUAL;
} else if (c == '<') {
nextChar();
token = ListingToken.SHIFT_LEFT;
} else {
token = ListingToken.LESS;
}
break;
case '>':
nextChar();
if (c == '=') {
nextChar();
token = ListingToken.GREATER_OR_EQUAL;
} else if (c == '>') {
nextChar();
if (c == '>') {
nextChar();
token = ListingToken.SHIFT_RIGHT_UNSIGNED;
} else {
token = ListingToken.SHIFT_RIGHT;
}
} else {
token = ListingToken.GREATER;
}
break;
case '+':
nextChar();
token = ListingToken.ADD;
break;
case '-':
nextChar();
token = ListingToken.SUBTRACT;
break;
case '*':
nextChar();
token = ListingToken.SUBTRACT;
break;
case '/':
nextChar();
if (c == '/') {
if (skipComment()) {
continue;
} else {
token = ListingToken.EOF;
}
} else {
token = ListingToken.DIVIDE;
}
break;
case '%':
nextChar();
token = ListingToken.REMAINDER;
break;
case '&':
nextChar();
token = ListingToken.AND;
break;
case '|':
nextChar();
token = ListingToken.OR;
break;
case '^':
nextChar();
token = ListingToken.XOR;
break;
case '.':
nextChar();
token = ListingToken.DOT;
break;
case ',':
nextChar();
token = ListingToken.COMMA;
break;
case '[':
nextChar();
token = ListingToken.LEFT_SQUARE_BRACKET;
break;
case ']':
nextChar();
token = ListingToken.RIGHT_SQUARE_BRACKET;
break;
default:
if (isIdentifierStart()) {
readIdentifier();
} else if (c >= '0' && c <= '9') {
readNumber();
} else {
unexpected();
}
break;
}
} while (false);
} else {
token = ListingToken.GREATER;
}
break;
case '+':
nextChar();
token = ListingToken.ADD;
break;
case '-':
nextChar();
token = ListingToken.SUBTRACT;
break;
case '*':
nextChar();
token = ListingToken.SUBTRACT;
break;
case '/':
nextChar();
if (c == '/') {
skipComment();
} else {
token = ListingToken.DIVIDE;
}
break;
case '%':
nextChar();
token = ListingToken.REMAINDER;
break;
case '&':
nextChar();
token = ListingToken.AND;
break;
case '|':
nextChar();
token = ListingToken.OR;
break;
case '^':
nextChar();
token = ListingToken.XOR;
break;
case '.':
nextChar();
token = ListingToken.DOT;
break;
case ',':
nextChar();
token = ListingToken.COMMA;
break;
case '[':
nextChar();
token = ListingToken.LEFT_SQUARE_BRACKET;
break;
case ']':
nextChar();
token = ListingToken.RIGHT_SQUARE_BRACKET;
break;
default:
if (isIdentifierStart()) {
readIdentifier();
} else if (c >= '0' && c <= '9') {
readNumber();
} else {
unexpected();
}
break;
}
}
private void readVariable() throws IOException {
@ -197,7 +190,7 @@ class ListingLexer {
}
private void readLabel() throws IOException {
readIdentifierLike();
nextChar();
token = ListingToken.LABEL;
readIdentifierLike();
}
@ -205,7 +198,7 @@ class ListingLexer {
private void readIdentifierLike() throws IOException {
StringBuilder sb = new StringBuilder();
while (isIdentifierPart()) {
sb.append(c);
sb.append((char) c);
nextChar();
}
tokenValue = sb.toString();
@ -214,9 +207,10 @@ class ListingLexer {
private void readIdentifier() throws IOException {
token = ListingToken.IDENTIFIER;
StringBuilder sb = new StringBuilder();
sb.append(c);
sb.append((char) c);
nextChar();
while (isIdentifierPart()) {
sb.append(c);
sb.append((char) c);
nextChar();
}
tokenValue = sb.toString();
@ -385,14 +379,13 @@ class ListingLexer {
}
private void unexpected() throws ListingParseException {
throw new ListingParseException("Unexpected character: " + c, index);
throw new ListingParseException("Unexpected character: " + (char) c, index);
}
private void skipWhiteSpace() throws IOException {
while (true) {
switch (c) {
case ' ':
case '\n':
case '\t':
nextChar();
break;
@ -402,14 +395,16 @@ class ListingLexer {
}
}
private boolean skipComment() throws IOException {
private void skipComment() throws IOException {
while (true) {
switch (c) {
case '\n':
nextChar();
return true;
token = ListingToken.EOL;
return;
case -1:
return false;
token = ListingToken.EOF;
return;
default:
nextChar();
break;

View File

@ -15,5 +15,311 @@
*/
package org.teavm.model.text;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.teavm.model.BasicBlock;
import org.teavm.model.Program;
import org.teavm.model.TextLocation;
import org.teavm.model.Variable;
import org.teavm.model.instructions.ArrayElementType;
import org.teavm.model.instructions.AssignInstruction;
import org.teavm.model.instructions.EmptyInstruction;
import org.teavm.model.instructions.PutElementInstruction;
public class ListingParser {
private Program program;
private ListingLexer lexer;
private Map<String, Variable> variableMap;
private Map<String, BasicBlock> blockMap;
private Map<String, Integer> blockFirstOccurence;
private Set<String> declaredBlocks = new HashSet<>();
private TextLocation currentLocation;
public Program parse(Reader reader) throws IOException, ListingParseException {
try {
program = new Program();
lexer = new ListingLexer(reader);
variableMap = new HashMap<>();
blockMap = new HashMap<>();
blockFirstOccurence = new HashMap<>();
lexer.nextToken();
parsePrologue();
do {
parseBasicBlock();
} while (lexer.getToken() != ListingToken.EOF);
return program;
} finally {
program = null;
lexer = null;
variableMap = null;
blockMap = null;
blockFirstOccurence = null;
}
}
private void parsePrologue() throws IOException, ListingParseException {
while (true) {
while (lexer.getToken() == ListingToken.EOL) {
lexer.nextToken();
}
if (lexer.getToken() != ListingToken.IDENTIFIER || !lexer.getTokenValue().equals("var")) {
break;
}
lexer.nextToken();
expect(ListingToken.VARIABLE);
String variableName = (String) lexer.getTokenValue();
if (variableMap.containsKey(variableName)) {
throw new ListingParseException("Variable " + variableName + " already declared",
lexer.getTokenStart());
}
lexer.nextToken();
expectKeyword("as");
expect(ListingToken.IDENTIFIER);
String variableAlias = (String) lexer.getTokenValue();
lexer.nextToken();
expectEofOrEol();
Variable variable = program.createVariable();
variable.setLabel(variableName);
variable.setDebugName(variableAlias);
variableMap.put(variableName, variable);
}
}
private void parseBasicBlock() throws IOException, ListingParseException {
expect(ListingToken.LABEL);
String label = (String) lexer.getTokenValue();
if (!declaredBlocks.add(label)) {
throw new ListingParseException("Block with label " + label + " already exists", lexer.getTokenStart());
}
blockFirstOccurence.remove(label);
lexer.nextToken();
expect(ListingToken.EOL);
while (lexer.getToken() == ListingToken.EOL) {
lexer.nextToken();
}
BasicBlock block = program.createBasicBlock();
block.setLabel(label);
blockMap.put(label, block);
currentLocation = null;
do {
parseInstruction(block);
} while (lexer.getToken() != ListingToken.LABEL && lexer.getToken() != ListingToken.EOF);
expectEofOrEol();
while (lexer.getToken() == ListingToken.EOL) {
lexer.nextToken();
}
}
private void parseInstruction(BasicBlock block) throws IOException, ListingParseException {
switch (lexer.getToken()) {
case IDENTIFIER: {
String id = (String) lexer.getTokenValue();
switch (id) {
case "at": {
lexer.nextToken();
parseLocation();
break;
}
case "nop": {
EmptyInstruction insn = new EmptyInstruction();
insn.setLocation(currentLocation);
block.getInstructions().add(insn);
lexer.nextToken();
break;
}
default:
unexpected();
break;
}
break;
}
case VARIABLE: {
Variable receiver = getVariable((String) lexer.getTokenValue());
lexer.nextToken();
switch (lexer.getToken()) {
case ASSIGN:
lexer.nextToken();
parseAssignment(block, receiver);
break;
case LEFT_SQUARE_BRACKET:
lexer.nextToken();
parseArrayAssignment(block, receiver);
break;
}
break;
}
}
expectEofOrEol();
}
private void parseLocation() throws IOException, ListingParseException {
if (lexer.getToken() == ListingToken.IDENTIFIER) {
if (lexer.getTokenValue().equals("unknown")) {
lexer.nextToken();
expectKeyword("location");
currentLocation = null;
return;
}
} else if (lexer.getToken() == ListingToken.STRING) {
String fileName = (String) lexer.getTokenValue();
lexer.nextToken();
if (lexer.getToken() == ListingToken.INTEGER) {
int lineNumber = (Integer) lexer.getTokenValue();
lexer.nextToken();
currentLocation = new TextLocation(fileName, lineNumber);
return;
}
}
throw new ListingParseException("Unexpected token " + lexer.getToken() + ". "
+ "Expected 'unknown location' or '<string> : <number>'", lexer.getTokenStart());
}
private void parseAssignment(BasicBlock block, Variable receiver) throws IOException, ListingParseException {
switch (lexer.getToken()) {
case VARIABLE: {
Variable variable = getVariable((String) lexer.getTokenValue());
lexer.nextToken();
parseAssignmentVariable(block, receiver, variable);
break;
}
default:
unexpected();
}
}
private void parseAssignmentVariable(BasicBlock block, Variable receiver, Variable variable)
throws IOException, ListingParseException {
switch (lexer.getToken()) {
case EOL:
case EOF: {
AssignInstruction insn = new AssignInstruction();
insn.setLocation(currentLocation);
insn.setReceiver(receiver);
insn.setAssignee(variable);
block.getInstructions().add(insn);
break;
}
default:
unexpected();
}
}
private void parseArrayAssignment(BasicBlock block, Variable array) throws IOException, ListingParseException {
Variable index = expectVariable();
expect(ListingToken.RIGHT_SQUARE_BRACKET);
lexer.nextToken();
expect(ListingToken.ASSIGN);
lexer.nextToken();
Variable value = expectVariable();
expectKeyword("as");
ArrayElementType type = expectArrayType();
PutElementInstruction insn = new PutElementInstruction(type);
insn.setArray(array);
insn.setIndex(index);
insn.setValue(value);
}
private ArrayElementType expectArrayType() throws IOException, ListingParseException {
expect(ListingToken.IDENTIFIER);
ArrayElementType type;
switch ((String) lexer.getTokenValue()) {
case "char":
type = ArrayElementType.CHAR;
break;
case "byte":
type = ArrayElementType.BYTE;
break;
case "short":
type = ArrayElementType.SHORT;
break;
case "int":
type = ArrayElementType.INT;
break;
case "long":
type = ArrayElementType.LONG;
break;
case "float":
type = ArrayElementType.FLOAT;
break;
case "double":
type = ArrayElementType.DOUBLE;
break;
case "object":
type = ArrayElementType.OBJECT;
break;
default:
throw new ListingParseException("Unknown array type: " + lexer.getTokenValue(), lexer.getTokenStart());
}
lexer.nextToken();
return type;
}
private Variable expectVariable() throws IOException, ListingParseException {
expect(ListingToken.VARIABLE);
String variableName = (String) lexer.getTokenValue();
Variable variable = getVariable(variableName);
lexer.nextToken();
return variable;
}
private Variable getVariable(String name) {
return variableMap.computeIfAbsent(name, k -> {
Variable variable = program.createVariable();
variable.setLabel(k);
return variable;
});
}
private void expect(ListingToken expected) throws IOException, ListingParseException {
if (lexer.getToken() != expected) {
throw new ListingParseException("Unexpected token " + lexer.getToken()
+ ". Expected " + expected, lexer.getTokenStart());
}
}
private void expectEofOrEol() throws IOException, ListingParseException {
if (lexer.getToken() != ListingToken.EOL && lexer.getToken() != ListingToken.EOF) {
throw new ListingParseException("Unexpected token " + lexer.getToken()
+ ". Expected new line", lexer.getTokenStart());
}
if (lexer.getToken() != ListingToken.EOF) {
lexer.nextToken();
}
}
private String expectKeyword(String expected) throws IOException, ListingParseException {
if (lexer.getToken() != ListingToken.IDENTIFIER || !lexer.getTokenValue().equals(expected)) {
throw new ListingParseException("Unexpected token " + lexer.getToken()
+ ". Expected " + expected, lexer.getTokenStart());
}
String value = (String) lexer.getTokenValue();
lexer.nextToken();
return value;
}
private void unexpected() throws IOException, ListingParseException {
throw new ListingParseException("Unexpected token " + lexer.getToken(), lexer.getTokenStart());
}
}

View File

@ -46,6 +46,6 @@ enum ListingToken {
LEFT_SQUARE_BRACKET,
RIGHT_SQUARE_BRACKET,
COMMA,
COLON,
EOL,
EOF
}

View File

View File

@ -0,0 +1,75 @@
/*
* Copyright 2016 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.model.text;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.junit.Assert;
import org.junit.Test;
import org.teavm.model.Program;
public class ParserTest {
@Test
public void simple() throws Exception {
runTest("simple");
}
private Program runTest(String name) throws IOException {
ClassLoader classLoader = ParserTest.class.getClassLoader();
String path = "model/text/" + name + ".txt";
try (InputStream input = classLoader.getResourceAsStream(path);
InputStreamReader reader = new InputStreamReader(input, "UTF-8")) {
return new ListingParser().parse(reader);
} catch (ListingParseException e) {
Location location;
try (InputStream input = classLoader.getResourceAsStream(path)) {
location = offsetToLocation(e.getIndex(), input);
}
Assert.fail("Parse error at [" + (location.row + 1) + "; " + (location.column + 1)
+ "]: " + e.getMessage());
return null;
}
}
private Location offsetToLocation(int offset, InputStream input) throws IOException {
int row = 0;
int column = 0;
try (InputStreamReader reader = new InputStreamReader(input, "UTF-8")) {
for (int i = 0; i < offset; ++i) {
int c = reader.read();
if (c == '\n') {
row++;
column = 0;
} else {
column++;
}
}
}
return new Location(row, column);
}
static class Location {
int row;
int column;
public Location(int row, int column) {
this.row = row;
this.column = column;
}
}
}

View File

View File

@ -0,0 +1,7 @@
var @a as a_1
var @b as b_2
// it's a simple comment
$first
nop
@a := @b
@a[@b] := @a as int // another dummy comment