Wasm: working on binary parser

This commit is contained in:
Alexey Andreev 2022-12-02 21:35:07 +01:00
parent aaed8e312a
commit 53dbb72c53
10 changed files with 701 additions and 112 deletions

View File

@ -1,105 +0,0 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.disasm;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.function.Consumer;
import org.teavm.backend.wasm.model.WasmType;
import org.teavm.backend.wasm.parser.CodeSectionListener;
import org.teavm.backend.wasm.parser.CodeSectionParser;
import org.teavm.backend.wasm.parser.ModuleParser;
import org.teavm.common.ByteArrayAsyncInputStream;
public class DisasmCodeSectionListener implements CodeSectionListener {
private PrintWriter writer;
public DisasmCodeSectionListener(PrintWriter writer) {
this.writer = writer;
}
@Override
public void address(int address) {
writer.print("(; ");
for (int i = 7; i >= 0; --i) {
var digit = (address >>> (i * 4)) & 0xF;
writer.print(Character.forDigit(digit, 16));
}
writer.print(" ;)");
writer.println();
}
@Override
public void sectionStart(int functionCount) {
writer.println(" .code functions=" + functionCount);
}
@Override
public boolean functionStart(int index, int size) {
writer.println(" function fn_" + index);
return true;
}
@Override
public void localsStart(int count) {
writer.println(" locals " + count);
}
@Override
public void local(int start, int count, WasmType type) {
for (int i = 0; i < count; ++i) {
writer.println(" local " + (i + start) + ": " + type);
}
}
@Override
public void localsEnd() {
writer.println(" end_locals");
}
@Override
public void functionEnd() {
writer.println(" end_function");
}
@Override
public void sectionEnd() {
writer.println(" end_code");
}
public static void main(String[] args) throws IOException {
var file = new File(args[0]);
var bytes = Files.readAllBytes(file.toPath());
var input = new ByteArrayAsyncInputStream(bytes);
var parser = new ModuleParser(input) {
@Override
protected Consumer<byte[]> getSectionConsumer(int code, int pos, String name) {
if (code == 10) {
return bytes -> {
var writer = new PrintWriter(System.out);
var sectionParser = new CodeSectionParser(new DisasmCodeSectionListener(writer));
sectionParser.parse(bytes);
writer.flush();
};
}
return null;
}
};
input.readFully(parser::parse);
}
}

View File

@ -0,0 +1,259 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.disasm;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.function.Consumer;
import org.teavm.backend.wasm.model.WasmType;
import org.teavm.backend.wasm.model.expression.WasmFloatBinaryOperation;
import org.teavm.backend.wasm.model.expression.WasmFloatType;
import org.teavm.backend.wasm.model.expression.WasmIntBinaryOperation;
import org.teavm.backend.wasm.model.expression.WasmIntType;
import org.teavm.backend.wasm.parser.AddressListener;
import org.teavm.backend.wasm.parser.BranchOpcode;
import org.teavm.backend.wasm.parser.CodeListener;
import org.teavm.backend.wasm.parser.CodeSectionListener;
import org.teavm.backend.wasm.parser.CodeSectionParser;
import org.teavm.backend.wasm.parser.LocalOpcode;
import org.teavm.backend.wasm.parser.ModuleParser;
import org.teavm.backend.wasm.parser.Opcode;
import org.teavm.common.ByteArrayAsyncInputStream;
public class DisassemblyCodeSectionListener implements AddressListener, CodeSectionListener, CodeListener {
private DisassemblyWriter writer;
private int address;
private int blockIdGen;
public DisassemblyCodeSectionListener(DisassemblyWriter writer) {
this.writer = writer;
}
@Override
public void address(int address) {
this.address = address;
}
@Override
public void sectionStart(int functionCount) {
writer.address(address).write("(; code section ;)").eol();
}
@Override
public boolean functionStart(int index, int size) {
writer.address(address).write("(func $fun_" + index).indent().eol();
return true;
}
@Override
public void localsStart(int count) {
writer.address(address).write("(; locals " + count + " ;)").eol();
}
@Override
public void local(int start, int count, WasmType type) {
writer.address(address);
for (int i = 0; i < count; ++i) {
writer.write("(local $loc_" + (i + start) + " " + typeToString(type) + ")").eol();
}
}
@Override
public CodeListener code() {
blockIdGen = 0;
return this;
}
@Override
public void functionEnd() {
writer.outdent().write(")").eol();
}
@Override
public void sectionEnd() {
writer.outdent().write(")").eol();
}
private String blockTypeToString(WasmType type) {
if (type == null) {
return "";
} else {
return " " + typeToString(type);
}
}
private String typeToString(WasmType type) {
if (type != null) {
switch (type) {
case INT32:
return "i32";
case INT64:
return "i64";
case FLOAT32:
return "f32";
case FLOAT64:
return "f64";
default:
break;
}
}
return "unknown";
}
@Override
public void error(int depth) {
writer.address(address);
writer.write("error").eol();
for (int i = 0; i < depth; ++i) {
writer.outdent();
}
}
@Override
public int startBlock(boolean loop, WasmType type) {
writer.address(address);
var label = blockIdGen++;
writer.write(loop ? "loop" : "block").write(" $label_" + label).write(blockTypeToString(type))
.indent().eol();
return label;
}
@Override
public int startConditionalBlock(WasmType type) {
writer.address(address);
var label = blockIdGen++;
writer.write("if ").write(" $label_" + label).write(blockTypeToString(type)).indent().eol();
return label;
}
@Override
public void startElseSection(int token) {
writer.address(address);
writer.outdent().write("else (; $label_" + token + " ;)").indent().eol();
}
@Override
public void endBlock(int token) {
writer.address(address).outdent().write("end (; $label_" + token + " ;)").eol();
}
@Override
public void branch(BranchOpcode opcode, int depth, int target) {
writer.address(address);
switch (opcode) {
case BR:
writer.write("br");
break;
case BR_IF:
writer.write("br_if");
break;
}
writer.write(" $label_" + target).eol();
}
@Override
public void tableBranch(int[] depths, int[] targets, int defaultDepth, int defaultTarget) {
writer.address(address);
writer.write("br_table");
for (var target : targets) {
writer.write(" $label_" + target);
}
writer.write(" $label_" + defaultTarget).eol();
}
@Override
public void opcode(Opcode opcode) {
writer.address(address);
switch (opcode) {
case UNREACHABLE:
writer.write("unreachable");
break;
case RETURN:
writer.write("return");
break;
}
writer.eol();
}
@Override
public void local(LocalOpcode opcode, int index) {
writer.address(address);
switch (opcode) {
case GET:
writer.write("local.get");
break;
case SET:
writer.write("local.set");
break;
}
writer.write(" $loc_" + index).eol();
}
@Override
public void binary(WasmIntBinaryOperation opcode, WasmIntType type) {
}
@Override
public void binary(WasmFloatBinaryOperation opcode, WasmFloatType type) {
}
@Override
public void int32Constant(int value) {
writer.address(address).write("i32.const " + value).eol();
}
@Override
public void int64Constant(long value) {
writer.address(address).write("i64.const " + value).eol();
}
@Override
public void float32Constant(float value) {
writer.address(address).write("f32.const " + Float.toHexString(value)).eol();
}
@Override
public void float64Constant(double value) {
writer.address(address).write("f64.const " + Double.toHexString(value)).eol();
}
public static void main(String[] args) throws IOException {
var file = new File(args[0]);
var bytes = Files.readAllBytes(file.toPath());
var input = new ByteArrayAsyncInputStream(bytes);
var parser = new ModuleParser(input) {
@Override
protected Consumer<byte[]> getSectionConsumer(int code, int pos, String name) {
if (code == 10) {
return bytes -> {
var out = new PrintWriter(System.out);
var writer = new DisassemblyWriter(out, true);
var disassembler = new DisassemblyCodeSectionListener(writer);
var sectionParser = new CodeSectionParser(disassembler, disassembler);
sectionParser.parse(bytes);
out.flush();
};
}
return null;
}
};
input.readFully(parser::parse);
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.disasm;
import java.io.PrintWriter;
public class DisassemblyWriter {
private PrintWriter out;
private boolean withAddress;
private int indentLevel;
private int address;
private boolean hasAddress;
private boolean lineStarted;
public DisassemblyWriter(PrintWriter out, boolean withAddress) {
this.out = out;
this.withAddress = withAddress;
}
public DisassemblyWriter address(int address) {
this.address = address;
hasAddress = true;
return this;
}
public DisassemblyWriter indent() {
indentLevel++;
return this;
}
public DisassemblyWriter outdent() {
indentLevel--;
return this;
}
public DisassemblyWriter eol() {
out.println();
lineStarted = false;
return this;
}
private void startLine() {
if (!lineStarted) {
lineStarted = true;
if (withAddress) {
if (hasAddress) {
hasAddress = false;
printAddress();
} else {
out.print(" ");
}
}
}
for (int i = 0; i < indentLevel; ++i) {
out.print(" ");
}
}
private void printAddress() {
out.print("(; ");
for (int i = 7; i >= 0; --i) {
var digit = (address >>> (i * 4)) & 0xF;
out.print(Character.forDigit(digit, 16));
}
out.print(" ;) ");
}
public DisassemblyWriter write(String s) {
startLine();
out.print(s);
return this;
}
}

View File

@ -0,0 +1,20 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.parser;
public interface AddressListener {
void address(int address);
}

View File

@ -0,0 +1,21 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.parser;
public enum BranchOpcode {
BR,
BR_IF
}

View File

@ -0,0 +1,54 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.parser;
import org.teavm.backend.wasm.model.WasmType;
import org.teavm.backend.wasm.model.expression.WasmFloatBinaryOperation;
import org.teavm.backend.wasm.model.expression.WasmFloatType;
import org.teavm.backend.wasm.model.expression.WasmIntBinaryOperation;
import org.teavm.backend.wasm.model.expression.WasmIntType;
public interface CodeListener {
void error(int depth);
int startBlock(boolean loop, WasmType type);
int startConditionalBlock(WasmType type);
void startElseSection(int token);
void endBlock(int token);
void branch(BranchOpcode opcode, int depth, int target);
void tableBranch(int[] depths, int[] targets, int defaultDepth, int defaultTarget);
void opcode(Opcode opcode);
void local(LocalOpcode opcode, int index);
void binary(WasmIntBinaryOperation opcode, WasmIntType type);
void binary(WasmFloatBinaryOperation opcode, WasmFloatType type);
void int32Constant(int value);
void int64Constant(long value);
void float32Constant(float value);
void float64Constant(double value);
}

View File

@ -18,8 +18,6 @@ package org.teavm.backend.wasm.parser;
import org.teavm.backend.wasm.model.WasmType; import org.teavm.backend.wasm.model.WasmType;
public interface CodeSectionListener { public interface CodeSectionListener {
void address(int address);
void sectionStart(int functionCount); void sectionStart(int functionCount);
boolean functionStart(int index, int size); boolean functionStart(int index, int size);
@ -28,7 +26,7 @@ public interface CodeSectionListener {
void local(int start, int count, WasmType type); void local(int start, int count, WasmType type);
void localsEnd(); CodeListener code();
void functionEnd(); void functionEnd();

View File

@ -15,15 +15,21 @@
*/ */
package org.teavm.backend.wasm.parser; package org.teavm.backend.wasm.parser;
import java.util.ArrayList;
import java.util.List;
import org.teavm.backend.wasm.model.WasmType; import org.teavm.backend.wasm.model.WasmType;
public class CodeSectionParser { public class CodeSectionParser {
private AddressListener addressListener;
private CodeSectionListener listener; private CodeSectionListener listener;
private byte[] data; private byte[] data;
private int ptr; private int ptr;
private CodeListener codeListener;
private int lastReportedPtr = -1; private int lastReportedPtr = -1;
private List<Block> blockStack = new ArrayList<>();
public CodeSectionParser(CodeSectionListener listener) { public CodeSectionParser(AddressListener addressListener, CodeSectionListener listener) {
this.addressListener = addressListener;
this.listener = listener; this.listener = listener;
} }
@ -55,6 +61,10 @@ public class CodeSectionParser {
if (listener.functionStart(index, functionSize)) { if (listener.functionStart(index, functionSize)) {
parseLocals(); parseLocals();
} }
codeListener = listener.code();
if (codeListener != null) {
parseCode();
}
ptr = end; ptr = end;
reportAddress(); reportAddress();
listener.functionEnd(); listener.functionEnd();
@ -73,11 +83,139 @@ public class CodeSectionParser {
localIndex += countInGroup; localIndex += countInGroup;
} }
reportAddress(); reportAddress();
listener.localsEnd(); }
private void parseCode() {
if (!parseExpressions()) {
codeListener.error(blockStack.size());
blockStack.clear();
}
}
private boolean parseExpressions() {
while (data[ptr] != 0x0B) {
if (!parseExpr()) {
return false;
}
}
return true;
}
private boolean parseExpr() {
reportAddress();
switch (data[ptr++]) {
case 0x00:
codeListener.opcode(Opcode.UNREACHABLE);
break;
case 0x02:
return parseBlock(false);
case 0x03:
return parseBlock(true);
case 0x04:
return parseConditional();
case 0x0C:
parseBranch(BranchOpcode.BR);
break;
case 0x0D:
parseBranch(BranchOpcode.BR_IF);
break;
case 0x0E:
parseTableBranch();
break;
case 0x0F:
codeListener.opcode(Opcode.UNREACHABLE);
break;
case 0x20:
codeListener.local(LocalOpcode.GET, readLEB());
break;
case 0x21:
codeListener.local(LocalOpcode.SET, readLEB());
break;
case 0x41:
codeListener.int32Constant(readSignedLEB());
break;
case 0x42:
codeListener.int64Constant(readSignedLongLEB());
break;
case 0x43:
codeListener.float32Constant(Float.intBitsToFloat(readFixedInt()));
break;
case 0x44:
codeListener.float64Constant(Double.longBitsToDouble(readFixedLong()));
break;
default:
return false;
}
return true;
}
private boolean parseBlock(boolean isLoop) {
var type = readType();
var token = codeListener.startBlock(isLoop, type);
blockStack.add(new Block(token));
if (!parseExpressions()) {
return false;
}
blockStack.remove(blockStack.size() - 1);
reportAddress();
codeListener.endBlock(token);
++ptr;
return true;
}
private boolean parseConditional() {
var type = readType();
var token = codeListener.startConditionalBlock(type);
blockStack.add(new Block(token));
var hasElse = false;
loop: while (true) {
switch (data[ptr]) {
case 0x0B:
break loop;
case 0x05:
if (hasElse) {
return false;
}
reportAddress();
codeListener.startElseSection(blockStack.get(blockStack.size() - 1).token);
++ptr;
break;
default:
if (!parseExpr()) {
return false;
}
break;
}
}
blockStack.remove(blockStack.size() - 1);
reportAddress();
codeListener.endBlock(token);
++ptr;
return true;
}
private void parseBranch(BranchOpcode opcode) {
var depth = readLEB();
var target = blockStack.get(blockStack.size() - depth);
codeListener.branch(opcode, depth, target.token);
}
private void parseTableBranch() {
var count = readLEB();
var depths = new int[count];
var targets = new int[count];
for (var i = 0; i < count; ++i) {
var depth = readLEB();
depths[i] = depth;
targets[i] = blockStack.get(blockStack.size() - depth).token;
}
var defaultDepth = readLEB();
var defaultTarget = blockStack.get(blockStack.size() - defaultDepth).token;
codeListener.tableBranch(depths, targets, defaultDepth, defaultTarget);
} }
private WasmType readType() { private WasmType readType() {
var typeId = data[ptr]; var typeId = data[ptr++];
switch (typeId) { switch (typeId) {
case 0x7F: case 0x7F:
return WasmType.INT32; return WasmType.INT32;
@ -95,10 +233,29 @@ public class CodeSectionParser {
private void reportAddress() { private void reportAddress() {
if (ptr != lastReportedPtr) { if (ptr != lastReportedPtr) {
lastReportedPtr = ptr; lastReportedPtr = ptr;
listener.address(ptr); if (addressListener != null) {
addressListener.address(ptr);
}
} }
} }
private int readSignedLEB() {
var result = 0;
var shift = 0;
while (true) {
var digit = data[ptr++];
result |= (digit & 0x7F) << shift;
if ((digit & 0x80) == 0) {
if ((digit & 0x40) != 0) {
result |= -1 << (shift + 7);
}
break;
}
shift += 7;
}
return result;
}
private int readLEB() { private int readLEB() {
var result = 0; var result = 0;
var shift = 0; var shift = 0;
@ -112,4 +269,61 @@ public class CodeSectionParser {
} }
return result; return result;
} }
private long readSignedLongLEB() {
var result = 0L;
var shift = 0;
while (true) {
var digit = data[ptr++];
result |= (digit & 0x7FL) << shift;
if ((digit & 0x80) == 0) {
if ((digit & 0x40) != 0) {
result |= -1L << (shift + 7);
}
break;
}
shift += 7;
}
return result;
}
private long readLongLEB() {
var result = 0L;
var shift = 0;
while (true) {
var digit = data[ptr++];
result |= (digit & 0x7FL) << shift;
if ((digit & 0x80) == 0) {
break;
}
shift += 7;
}
return result;
}
private int readFixedInt() {
return ((data[ptr] & 0xFF) << 24)
| ((data[ptr] & 0xFF) << 16)
| ((data[ptr] & 0xFF) << 8)
| (data[ptr] & 0xFF);
}
private long readFixedLong() {
return ((data[ptr] & 0xFFL) << 56)
| ((data[ptr] & 0xFFL) << 48)
| ((data[ptr] & 0xFFL) << 40)
| ((data[ptr] & 0xFFL) << 32)
| ((data[ptr] & 0xFFL) << 24)
| ((data[ptr] & 0xFF) << 16)
| ((data[ptr] & 0xFF) << 8)
| (data[ptr] & 0xFF);
}
private static class Block {
int token;
Block(int token) {
this.token = token;
}
}
} }

View File

@ -0,0 +1,21 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.parser;
public enum LocalOpcode {
GET,
SET
}

View File

@ -0,0 +1,21 @@
/*
* Copyright 2022 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.backend.wasm.parser;
public enum Opcode {
UNREACHABLE,
RETURN
}