From d50e048ea492205214b0fb088243496cd58e0ef5 Mon Sep 17 00:00:00 2001 From: Alexey Andreev Date: Tue, 14 Apr 2020 18:31:19 +0300 Subject: [PATCH] Add Character.toTitleCase. More efficient implementation for Character.digit --- .../org/teavm/classlib/impl/JCLPlugin.java | 2 + .../classlib/impl/unicode/UnicodeHelper.java | 53 ++++++++++++++++--- .../classlib/impl/unicode/UnicodeSupport.java | 16 ++++++ .../teavm/classlib/java/io/TPrintStream.java | 10 ++++ .../java/lang/CharacterMetadataGenerator.java | 10 +++- .../teavm/classlib/java/lang/TCharacter.java | 34 +++++++++++- 6 files changed, 115 insertions(+), 10 deletions(-) diff --git a/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java b/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java index 7209a10dc..4c9a283a2 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java @@ -210,6 +210,8 @@ public class JCLPlugin implements TeaVMPlugin { new CharacterMetadataGenerator()); reg.register(new MethodReference(Character.class, "obtainClasses", StringResource.class), new CharacterMetadataGenerator()); + reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class), + new CharacterMetadataGenerator()); } @PlatformMarker diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java index 9ad874997..dcf38ca50 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java @@ -44,21 +44,58 @@ public final class UnicodeHelper { } } - public static String encodeIntByte(int[] data) { + public static String encodeIntPairsDiff(int[] data) { StringBuilder sb = new StringBuilder(); - Base46.encode(sb, data.length); - for (int i = 0; i < data.length; i++) { - Base46.encode(sb, data[i]); + Base46.encodeUnsigned(sb, data.length / 2); + int lastKey = 0; + int lastValue = 0; + for (int i = 0; i < data.length; i += 2) { + int key = data[i]; + int value = data[i + 1]; + Base46.encode(sb, key - lastKey); + Base46.encode(sb, value - lastValue); + lastKey = key; + lastValue = value; } return sb.toString(); } - public static int[] decodeIntByte(String text) { + public static int[] decodeIntPairsDiff(String text) { CharFlow flow = new CharFlow(text.toCharArray()); - int sz = Base46.decode(flow); - int[] data = new int[sz]; + int sz = Base46.decodeUnsigned(flow); + int[] data = new int[sz * 2]; + int j = 0; + int lastKey = 0; + int lastValue = 0; for (int i = 0; i < sz; i++) { - data[i] = Base46.decode(flow); + lastKey += Base46.decode(flow); + lastValue += Base46.decode(flow); + data[j++] = lastKey; + data[j++] = lastValue; + } + return data; + } + + public static String encodeIntDiff(int[] data) { + StringBuilder sb = new StringBuilder(); + Base46.encodeUnsigned(sb, data.length); + int last = 0; + for (int i = 0; i < data.length; i++) { + int v = data[i]; + Base46.encode(sb, v - last); + last = v; + } + return sb.toString(); + } + + public static int[] decodeIntDiff(String text) { + CharFlow flow = new CharFlow(text.toCharArray()); + int sz = Base46.decodeUnsigned(flow); + int[] data = new int[sz]; + int last = 0; + for (int i = 0; i < sz; i++) { + last += Base46.decode(flow); + data[i] = last; } return data; } diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java index fb403d6bb..feda93cbd 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java @@ -31,6 +31,7 @@ public final class UnicodeSupport { private static volatile CountDownLatch latch = new CountDownLatch(1); private static int[] digitValues; private static byte[] classes; + private static int[] titleCaseMapping; private static Map classMap = new HashMap<>(); static { @@ -72,6 +73,7 @@ public final class UnicodeSupport { private static void parseUnicodeData() { IntegerArray digitValues = new IntegerArray(4096); IntegerArray classes = new IntegerArray(65536); + IntegerArray titleCaseMapping = new IntegerArray(256); try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class .getResourceAsStream("UnicodeData.txt")))) { while (true) { @@ -94,6 +96,14 @@ public final class UnicodeSupport { } Byte charClass = classMap.get(fields[2]); classes.add(charClass != null ? charClass.intValue() : 0); + + if (!fields[14].isEmpty()) { + int titleCaseCode = parseHex(fields[14]); + if (fields[12].isEmpty() || parseHex(fields[12]) != titleCaseCode) { + titleCaseMapping.add(charCode); + titleCaseMapping.add(titleCaseCode); + } + } } } catch (IOException e) { throw new RuntimeException("Error reading unicode data", e); @@ -120,6 +130,7 @@ public final class UnicodeSupport { for (int i = 0; i < classes.size(); ++i) { UnicodeSupport.classes[i] = (byte) classes.get(i); } + UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll(); } private static String[] splitLine(String line) { @@ -200,4 +211,9 @@ public final class UnicodeSupport { ensureUnicodeData(); return classes; } + + public static int[] getTitleCaseMapping() { + ensureUnicodeData(); + return titleCaseMapping; + } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java b/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java index 248e6b543..02e80139c 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java +++ b/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java @@ -202,6 +202,16 @@ public class TPrintStream extends TFilterOutputStream { printSB(); } + public void println(char c) { + sb.append(c); + printSB(); + } + + public void println(boolean b) { + sb.append(b); + printSB(); + } + public void println(String s) { sb.append(s).append('\n'); printSB(); diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java b/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java index e75acc78f..be96a14bd 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java @@ -31,6 +31,8 @@ public class CharacterMetadataGenerator implements MetadataGenerator { return generateObtainDigitMapping(context); case "obtainClasses": return generateObtainClasses(context); + case "acquireTitleCaseMapping": + return generateObtainTitleCaseMapping(context); default: return null; } @@ -38,7 +40,7 @@ public class CharacterMetadataGenerator implements MetadataGenerator { private Resource generateObtainDigitMapping(MetadataGeneratorContext context) { StringResource res = context.createResource(StringResource.class); - res.setValue(UnicodeHelper.encodeIntByte(UnicodeSupport.getDigitValues())); + res.setValue(UnicodeHelper.encodeIntPairsDiff(UnicodeSupport.getDigitValues())); return res; } @@ -47,4 +49,10 @@ public class CharacterMetadataGenerator implements MetadataGenerator { res.setValue(UnicodeHelper.compressRle(UnicodeSupport.getClasses())); return res; } + + private Resource generateObtainTitleCaseMapping(MetadataGeneratorContext context) { + StringResource res = context.createResource(StringResource.class); + res.setValue(UnicodeHelper.encodeIntDiff(UnicodeSupport.getTitleCaseMapping())); + return res; + } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java index b5c8e2234..9ef3e9223 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java @@ -91,6 +91,7 @@ public class TCharacter extends TObject implements TComparable { public static final int SIZE = 16; static final int ERROR = 0xFFFFFFFF; private static int[] digitMapping; + private static int[] titleCaseMapping; private static UnicodeHelper.Range[] classMapping; private char value; private static TCharacter[] characterCache = new TCharacter[128]; @@ -266,6 +267,37 @@ public class TCharacter extends TObject implements TComparable { @Unmanaged private static native int toUpperCaseSystem(int codePoint); + public static int toTitleCase(int codePoint) { + int[] mapping = getTitleCaseMapping(); + int l = 0; + int u = (mapping.length / 2) - 1; + while (u >= l) { + int idx = (l + u) / 2; + int val = mapping[idx * 2]; + if (codePoint > val) { + l = idx + 1; + } else if (codePoint < val) { + u = idx - 1; + } else { + return mapping[idx * 2 + 1]; + } + } + return toUpperCase(codePoint); + } + + public static char toTitleCase(char c) { + return (char) toTitleCase((int) c); + } + + private static int[] getTitleCaseMapping() { + if (titleCaseMapping == null) { + titleCaseMapping = UnicodeHelper.decodeIntDiff(acquireTitleCaseMapping().getValue()); + } + return titleCaseMapping; + } + + private static native StringResource acquireTitleCaseMapping(); + public static int digit(char ch, int radix) { return digit((int) ch, radix); } @@ -317,7 +349,7 @@ public class TCharacter extends TObject implements TComparable { private static int[] getDigitMapping() { if (digitMapping == null) { - digitMapping = UnicodeHelper.decodeIntByte(obtainDigitMapping().getValue()); + digitMapping = UnicodeHelper.decodeIntPairsDiff(obtainDigitMapping().getValue()); } return digitMapping; }