From 9c4f00c18abd4265b3557e6c266a09bf49ab0452 Mon Sep 17 00:00:00 2001 From: konsoletyper Date: Mon, 3 Mar 2014 16:29:46 +0400 Subject: [PATCH] Adds most of java.lang.Character implementation. Adds integer stack to avoid stack overflow when using fixed-sized array-based stack --- .../classlib/impl/unicode/UnicodeHelper.java | 43 ++- .../java/lang/CharacterNativeGenerator.java | 22 +- .../teavm/classlib/java/lang/TCharacter.java | 287 +++++++++++++++++- .../teavm/classlib/java/lang/TInteger.java | 2 +- .../teavm/common/DominatorTreeBuilder.java | 12 +- .../java/org/teavm/common/IntegerStack.java | 50 +++ .../teavm/javascript/JavascriptBuilder.java | 14 +- .../UnreachableBasicBlockEliminator.java | 16 +- .../resources/org/teavm/javascript/runtime.js | 2 +- 9 files changed, 393 insertions(+), 55 deletions(-) create mode 100644 teavm-core/src/main/java/org/teavm/common/IntegerStack.java diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java b/teavm-classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java index 66f82e04d..1362c2712 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java @@ -30,6 +30,18 @@ public class UnicodeHelper { return digit <= '9' ? digit - '0' : digit - 'A' + 10; } + public static class Range { + public final int start; + public final int end; + public final byte[] data; + + public Range(int start, int end, byte[] data) { + this.start = start; + this.end = end; + this.data = data; + } + } + public static String encodeIntByte(int[] data) { char[] chars = new char[data.length / 2 * 5]; int j = 0; @@ -107,33 +119,44 @@ public class UnicodeHelper { return sb.toString(); } - public static byte[] extractRle(String encoded) { - byte[] data = new byte[65536 * 4]; + public static Range[] extractRle(String encoded) { + Range[] ranges = new Range[16384]; + byte[] buffer = new byte[16384]; int index = 0; + int rangeIndex = 0; + int codePoint = 0; for (int i = 0; i < encoded.length(); ++i) { byte b = decodeByte(encoded.charAt(i)); + int count = 1; if (b == 64) { b = decodeByte(encoded.charAt(++i)); - int count = 0; + count = 0; int pos = 1; for (int j = 0; j < 3; ++j) { byte digit = decodeByte(encoded.charAt(++i)); count |= pos * digit; pos *= 0x40; } - while (count-- > 0) { - data[index++] = b; - } } else if (b > 32) { b -= 32; - byte count = decodeByte(encoded.charAt(++i)); + count = decodeByte(encoded.charAt(++i)); + } else { + buffer[index++] = b; + } + if (count == 1) { + buffer[index++] = b; + } else if (b != 0) { while (count-- > 0) { - data[index++] = b; + buffer[index++] = b; } } else { - data[index++] = b; + if (index > 0) { + ranges[rangeIndex++] = new Range(codePoint, codePoint + index, Arrays.copyOf(buffer, index)); + } + codePoint += index + count; + index = 0; } } - return Arrays.copyOf(data, index); + return Arrays.copyOf(ranges, rangeIndex); } } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/CharacterNativeGenerator.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/CharacterNativeGenerator.java index e73338710..0a06f086c 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/CharacterNativeGenerator.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/CharacterNativeGenerator.java @@ -25,7 +25,6 @@ import org.teavm.dependency.MethodDependency; import org.teavm.javascript.ni.Generator; import org.teavm.javascript.ni.GeneratorContext; import org.teavm.model.MethodReference; -import org.teavm.model.ValueType; /** * @@ -36,11 +35,12 @@ public class CharacterNativeGenerator implements Generator, DependencyPlugin { public void generate(GeneratorContext context, SourceWriter writer, MethodReference methodRef) throws IOException { switch (methodRef.getName()) { case "toLowerCase": - if (methodRef.getDescriptor().parameterType(0) == ValueType.CHARACTER) { - generateToLowerCase(context, writer); - } else { - generateToLowerCaseInt(context, writer); - } + writer.append("return String.fromCharCode(").append(context.getParameterName(1)) + .append(").toLowerCase().charCodeAt(0)|0;").softNewLine(); + break; + case "toUpperCase": + writer.append("return String.fromCharCode(").append(context.getParameterName(1)) + .append(").toUpperCase().charCodeAt(0)|0;").softNewLine(); break; case "obtainDigitMapping": generateObtainDigitMapping(writer); @@ -61,16 +61,6 @@ public class CharacterNativeGenerator implements Generator, DependencyPlugin { } } - private void generateToLowerCase(GeneratorContext context, SourceWriter writer) throws IOException{ - writer.append("return String.fromCharCode(").append(context.getParameterName(1)) - .append(").toLowerCase().charCodeAt(0)|0;").softNewLine(); - } - - private void generateToLowerCaseInt(GeneratorContext context, SourceWriter writer) throws IOException{ - writer.append("return String.fromCharCode(").append(context.getParameterName(1)) - .append(").toLowerCase().charCodeAt(0);").softNewLine(); - } - private void generateObtainDigitMapping(SourceWriter writer) throws IOException { String str = UnicodeHelper.encodeIntByte(UnicodeSupport.getDigitValues()); writer.append("return $rt_str("); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java index be0b12d67..edbb2e6ca 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java @@ -25,7 +25,7 @@ import org.teavm.javascript.ni.Rename; * * @author Alexey Andreev */ -public class TCharacter extends TObject { +public class TCharacter extends TObject implements TComparable { public static final int MIN_RADIX = 2; public static final int MAX_RADIX = 36; public static final char MIN_VALUE = '\0'; @@ -93,7 +93,7 @@ public class TCharacter extends TObject { public static final int SIZE = 16; static final int ERROR = 0xFFFFFFFF; private static int[] digitMapping; - private static byte[] classMapping; + private static UnicodeHelper.Range[] classMapping; private char value; private static TCharacter[] characterCache = new TCharacter[128]; @@ -225,12 +225,19 @@ public class TCharacter extends TObject { return UTF16Helper.lowSurrogate(codePoint); } + // TODO: implement toLowerCase/toUpperCase/toTitleCase using UnicodeData.txt instead of built-in JS @GeneratedBy(CharacterNativeGenerator.class) public static native char toLowerCase(char ch); @GeneratedBy(CharacterNativeGenerator.class) public static native int toLowerCase(int ch); + @GeneratedBy(CharacterNativeGenerator.class) + public static native char toUpperCase(char ch); + + @GeneratedBy(CharacterNativeGenerator.class) + public static native int toUpperCase(int codePoint); + public static int digit(char ch, int radix) { return digit((int)ch, radix); } @@ -239,11 +246,15 @@ public class TCharacter extends TObject { if (radix < MIN_RADIX || radix > MAX_RADIX) { return -1; } - int d = digit(codePoint); + int d = getNumericValue(codePoint); return d <= radix ? d : -1; } - static int digit(int codePoint) { + public static int getNumericValue(char ch) { + return getNumericValue((int)ch); + } + + public static int getNumericValue(int codePoint) { int[] digitMapping = getDigitMapping(); int l = 0; int u = (digitMapping.length / 2) - 1; @@ -273,7 +284,7 @@ public class TCharacter extends TObject { } public static boolean isDigit(int codePoint) { - return digit(codePoint) >= 0; + return getNumericValue(codePoint) >= 0; } private static int[] getDigitMapping() { @@ -287,7 +298,7 @@ public class TCharacter extends TObject { @PluggableDependency(CharacterNativeGenerator.class) private static native String obtainDigitMapping(); - private static byte[] getClasses() { + private static UnicodeHelper.Range[] getClasses() { if (classMapping == null) { classMapping = UnicodeHelper.extractRle(obtainClasses()); } @@ -378,6 +389,268 @@ public class TCharacter extends TObject { } public static int getType(int codePoint) { - return getClasses()[codePoint]; + UnicodeHelper.Range[] classes = getClasses(); + int l = 0; + int u = classes.length - 1; + while (l <= u) { + int i = (l + u) / 2; + UnicodeHelper.Range range = classes[i]; + if (codePoint >= range.end) { + l = i + 1; + } else if (codePoint < range.start) { + u = i - 1; + } else { + return range.data[codePoint - range.start]; + } + } + return 0; + } + + public static boolean isLowerCase(char ch) { + return isLowerCase((int)ch); + } + + public static boolean isLowerCase(int codePoint) { + return getType(codePoint) == LOWERCASE_LETTER; + } + + public static boolean isUpperCase(char ch) { + return isUpperCase((int)ch); + } + + public static boolean isUpperCase(int codePoint) { + return getType(codePoint) == UPPERCASE_LETTER; + } + + public static boolean isTitleCase(char ch) { + return isTitleCase((int)ch); + } + + public static boolean isTitleCase(int codePoint) { + return getType(codePoint) == TITLECASE_LETTER; + } + + public static boolean isDefined(char ch) { + return isDefined((int)ch); + } + + public static boolean isDefined(int codePoint) { + return getType(codePoint) != UNASSIGNED; + } + + public static boolean isLetter(char ch) { + return isLetter((int)ch); + } + + public static boolean isLetter(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + return true; + default: + return false; + } + } + + public static boolean isLetterOrDigit(char ch) { + return isLetterOrDigit((int)ch); + } + + public static boolean isLetterOrDigit(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case DECIMAL_DIGIT_NUMBER: + return true; + default: + return false; + } + } + + @Deprecated + public static boolean isJavaLetter(char ch) { + return isJavaIdentifierStart(ch); + } + + public static boolean isJavaIdentifierStart(char ch) { + return isJavaIdentifierStart((int)ch); + } + + public static boolean isJavaIdentifierStart(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case LETTER_NUMBER: + case CONNECTOR_PUNCTUATION: + case CURRENCY_SYMBOL: + return true; + default: + return isIdentifierIgnorable(codePoint); + } + } + + @Deprecated + public static boolean isJavaLetterOrDigit(char ch) { + return isJavaIdentifierPart(ch); + } + + public static boolean isJavaIdentifierPart(char ch) { + return isJavaIdentifierPart((int)ch); + } + + public static boolean isJavaIdentifierPart(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case LETTER_NUMBER: + case DECIMAL_DIGIT_NUMBER: + case COMBINING_SPACING_MARK: + case NON_SPACING_MARK: + case CONNECTOR_PUNCTUATION: + case CURRENCY_SYMBOL: + return true; + default: + return isIdentifierIgnorable(codePoint); + } + } + + public static boolean isAlphabetic(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case LETTER_NUMBER: + return true; + default: + return false; + } + } + + public static boolean isUnicodeIdentifierStart(char ch) { + return isUnicodeIdentifierStart((int)ch); + } + + public static boolean isUnicodeIdentifierStart(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case LETTER_NUMBER: + return true; + default: + return isIdentifierIgnorable(codePoint); + } + } + + public static boolean isUnicodeIdentifierPart(int codePoint) { + switch (getType(codePoint)) { + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + case TITLECASE_LETTER: + case MODIFIER_LETTER: + case OTHER_LETTER: + case LETTER_NUMBER: + case CONNECTOR_PUNCTUATION: + case DECIMAL_DIGIT_NUMBER: + case COMBINING_SPACING_MARK: + case NON_SPACING_MARK: + return true; + default: + return isIdentifierIgnorable(codePoint); + } + } + + public static boolean isIdentifierIgnorable(char ch) { + return isIdentifierIgnorable((int)ch); + } + + public static boolean isIdentifierIgnorable(int codePoint) { + if (codePoint >= 0x00 && codePoint <= 0x08 || codePoint >= 0x0E && codePoint <= 0x1B || + codePoint >= 0x7F && codePoint <= 0x9F) { + return true; + } + return getType(codePoint) == FORMAT; + } + + @Deprecated + public static boolean isSpace(char ch) { + switch (ch) { + case '\t': + case '\n': + case '\f': + case '\r': + case ' ': + return true; + default: + return false; + } + } + + public static boolean isSpaceChar(char ch) { + return isSpaceChar((int)ch); + } + + public static boolean isSpaceChar(int codePoint) { + switch (getType(codePoint)) { + case SPACE_SEPARATOR: + case LINE_SEPARATOR: + case PARAGRAPH_SEPARATOR: + return true; + default: + return false; + } + } + + public static boolean isWhitespace(char ch) { + return isWhitespace((int)ch); + } + + public static boolean isWhitespace(int codePoint) { + switch (codePoint) { + case '\t': + case '\n': + case 0xB: + case '\f': + case '\r': + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + return true; + default: + return isWhitespace(codePoint); + } + } + + // TODO: public static byte getDirectionality(char ch) + // TODO: public static boolean isMirrored(char ch) + + @Override + public int compareTo(TCharacter anotherCharacter) { + return compare(value, anotherCharacter.value); + } + + public static int compare(char x, char y) { + return x - y; + } + + public static char reverseBytes(char ch) { + return (char)((ch >> 8) | (ch << 8)); } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TInteger.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TInteger.java index 3e2caaa36..236b78e15 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TInteger.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TInteger.java @@ -80,7 +80,7 @@ public class TInteger extends TNumber implements TComparable { } int value = 0; while (index < s.length()) { - int digit = TCharacter.digit(s.charAt(index++)); + int digit = TCharacter.getNumericValue(s.charAt(index++)); if (digit < 0) { throw new TNumberFormatException(TString.wrap("String contains invalid digits: " + s)); } diff --git a/teavm-core/src/main/java/org/teavm/common/DominatorTreeBuilder.java b/teavm-core/src/main/java/org/teavm/common/DominatorTreeBuilder.java index 5f603a4d1..06626673a 100644 --- a/teavm-core/src/main/java/org/teavm/common/DominatorTreeBuilder.java +++ b/teavm-core/src/main/java/org/teavm/common/DominatorTreeBuilder.java @@ -123,18 +123,16 @@ class DominatorTreeBuilder { private void dfs() { Arrays.fill(semidominators, -1); Arrays.fill(vertices, -1); - int[] stack = new int[graph.size() + 1]; - int size = 0; + IntegerStack stack = new IntegerStack(graph.size()); for (int i = graph.size() - 1; i >= 0; --i) { if (graph.incomingEdgesCount(i) == 0) { - stack[size++] = i; + stack.push(i); parents[i] = -1; } } - stack[size++] = 0; int i = 0; - while (size > 0) { - int v = stack[--size]; + while (!stack.isEmpty()) { + int v = stack.pop(); if (semidominators[v] >= 0) { continue; } @@ -144,7 +142,7 @@ class DominatorTreeBuilder { for (int w : graph.outgoingEdges(v)) { if (semidominators[w] < 0) { parents[w] = v; - stack[size++] = w; + stack.push(w); } } } diff --git a/teavm-core/src/main/java/org/teavm/common/IntegerStack.java b/teavm-core/src/main/java/org/teavm/common/IntegerStack.java new file mode 100644 index 000000000..24cbc528f --- /dev/null +++ b/teavm-core/src/main/java/org/teavm/common/IntegerStack.java @@ -0,0 +1,50 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.common; + +import java.util.Arrays; + +/** + * + * @author Alexey Andreev + */ +public class IntegerStack { + private int[] buffer; + private int head; + + public IntegerStack(int capacity) { + this.buffer = new int[capacity]; + } + + public void push(int value) { + if (head == buffer.length) { + buffer = Arrays.copyOf(buffer, buffer.length * 2); + } + buffer[head++] = value; + } + + public int pop() { + return buffer[--head]; + } + + public int peek() { + return buffer[head - 1]; + } + + public boolean isEmpty() { + return head == 0; + } +} diff --git a/teavm-core/src/main/java/org/teavm/javascript/JavascriptBuilder.java b/teavm-core/src/main/java/org/teavm/javascript/JavascriptBuilder.java index dd5046ec4..b888aef5e 100644 --- a/teavm-core/src/main/java/org/teavm/javascript/JavascriptBuilder.java +++ b/teavm-core/src/main/java/org/teavm/javascript/JavascriptBuilder.java @@ -142,13 +142,15 @@ public class JavascriptBuilder implements JavascriptBuilderHost { SourceWriterBuilder builder = new SourceWriterBuilder(naming); builder.setMinified(minifying); SourceWriter sourceWriter = builder.build(writer); - dependencyChecker.linkMethod(new MethodReference("java.lang.Class", new MethodDescriptor("createNew", - ValueType.object("java.lang.Class"))), DependencyStack.ROOT).use(); - dependencyChecker.linkMethod(new MethodReference("java.lang.String", new MethodDescriptor("", - ValueType.arrayOf(ValueType.CHARACTER), ValueType.VOID)), DependencyStack.ROOT).use(); - dependencyChecker.linkMethod(new MethodReference("java.lang.String", new MethodDescriptor("getChars", + dependencyChecker.linkMethod(new MethodReference("java.lang.Class", "createNew", + ValueType.object("java.lang.Class")), DependencyStack.ROOT).use(); + dependencyChecker.linkMethod(new MethodReference("java.lang.String", "", + ValueType.arrayOf(ValueType.CHARACTER), ValueType.VOID), DependencyStack.ROOT).use(); + dependencyChecker.linkMethod(new MethodReference("java.lang.String", "getChars", ValueType.INTEGER, ValueType.INTEGER, ValueType.arrayOf(ValueType.CHARACTER), ValueType.INTEGER, - ValueType.VOID)), DependencyStack.ROOT).use(); + ValueType.VOID), DependencyStack.ROOT).use(); + dependencyChecker.linkMethod(new MethodReference("java.lang.String", "length", ValueType.INTEGER), + DependencyStack.ROOT).use(); dependencyChecker.linkMethod(new MethodReference("java.lang.Object", new MethodDescriptor("clone", ValueType.object("java.lang.Object"))), DependencyStack.ROOT).use(); executor.complete(); diff --git a/teavm-core/src/main/java/org/teavm/optimization/UnreachableBasicBlockEliminator.java b/teavm-core/src/main/java/org/teavm/optimization/UnreachableBasicBlockEliminator.java index 90c9f8ceb..d859df66e 100644 --- a/teavm-core/src/main/java/org/teavm/optimization/UnreachableBasicBlockEliminator.java +++ b/teavm-core/src/main/java/org/teavm/optimization/UnreachableBasicBlockEliminator.java @@ -15,6 +15,7 @@ */ package org.teavm.optimization; +import org.teavm.common.IntegerStack; import org.teavm.model.BasicBlock; import org.teavm.model.Program; import org.teavm.model.TryCatchBlock; @@ -31,11 +32,10 @@ public class UnreachableBasicBlockEliminator { } InstructionTransitionExtractor transitionExtractor = new InstructionTransitionExtractor(); boolean[] reachable = new boolean[program.basicBlockCount()]; - int[] stack = new int[program.basicBlockCount()]; - int top = 0; - stack[top++] = 0; - while (top > 0) { - int i = stack[--top]; + IntegerStack stack = new IntegerStack(program.basicBlockCount()); + stack.push(0); + while (!stack.isEmpty()) { + int i = stack.pop(); if (reachable[i]) { continue; } @@ -43,10 +43,12 @@ public class UnreachableBasicBlockEliminator { BasicBlock block = program.basicBlockAt(i); block.getLastInstruction().acceptVisitor(transitionExtractor); for (BasicBlock successor : transitionExtractor.getTargets()) { - stack[top++] = successor.getIndex(); + if (!reachable[successor.getIndex()]) { + stack.push(successor.getIndex()); + } } for (TryCatchBlock tryCatch : block.getTryCatchBlocks()) { - stack[top++] = tryCatch.getHandler().getIndex(); + stack.push(tryCatch.getHandler().getIndex()); } } for (int i = 0; i < reachable.length; ++i) { diff --git a/teavm-core/src/main/resources/org/teavm/javascript/runtime.js b/teavm-core/src/main/resources/org/teavm/javascript/runtime.js index 69b4af673..1c7c7fd58 100644 --- a/teavm-core/src/main/resources/org/teavm/javascript/runtime.js +++ b/teavm-core/src/main/resources/org/teavm/javascript/runtime.js @@ -58,7 +58,7 @@ $rt_createLongArray = function(sz) { } return arr; } -if (false) { +if (ArrayBuffer) { $rt_createNumericArray = function(cls, nativeArray) { return new ($rt_arraycls(cls))(nativeArray); }