diff --git a/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java b/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java index eb003442e..742213cfd 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/JCLPlugin.java @@ -237,6 +237,10 @@ public class JCLPlugin implements TeaVMPlugin { new CharacterMetadataGenerator()); reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class), new CharacterMetadataGenerator()); + reg.register(new MethodReference(Character.class, "acquireUpperCaseMapping", StringResource.class), + new CharacterMetadataGenerator()); + reg.register(new MethodReference(Character.class, "acquireLowerCaseMapping", StringResource.class), + new CharacterMetadataGenerator()); } @PlatformMarker diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java index 9c0a5efa6..1a0047940 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java @@ -87,6 +87,20 @@ public final class UnicodeHelper { return sb.toString(); } + public static String encodeCaseMapping(int[] data) { + StringBuilder sb = new StringBuilder(); + int sz = data.length / 2; + Base46.encodeUnsigned(sb, sz); + int last = 0; + for (int i = 0; i < sz; i++) { + int v = data[i * 2]; + Base46.encodeUnsigned(sb, v - last); + last = v; + Base46.encode(sb, data[i * 2 + 1]); + } + return sb.toString(); + } + public static int[] decodeIntDiff(String text) { CharFlow flow = new CharFlow(text.toCharArray()); int sz = Base46.decodeUnsigned(flow); @@ -99,6 +113,19 @@ public final class UnicodeHelper { return data; } + public static int[] decodeCaseMapping(String text) { + CharFlow flow = new CharFlow(text.toCharArray()); + int sz = Base46.decodeUnsigned(flow); + int[] data = new int[sz * 2]; + int last = 0; + for (int i = 0; i < sz; i++) { + last += Base46.decodeUnsigned(flow); + data[i * 2] = last; + data[i * 2 + 1] = Base46.decode(flow); + } + return data; + } + public static char encodeByte(byte b) { if (b < '\"' - ' ') { return (char) (b + ' '); diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java index feda93cbd..7ee4571ce 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeSupport.java @@ -32,6 +32,8 @@ public final class UnicodeSupport { private static int[] digitValues; private static byte[] classes; private static int[] titleCaseMapping; + private static int[] upperCaseMapping; + private static int[] lowerCaseMapping; private static Map classMap = new HashMap<>(); static { @@ -74,6 +76,8 @@ public final class UnicodeSupport { IntegerArray digitValues = new IntegerArray(4096); IntegerArray classes = new IntegerArray(65536); IntegerArray titleCaseMapping = new IntegerArray(256); + IntegerArray upperCaseMapping = new IntegerArray(256); + IntegerArray lowerCaseMapping = new IntegerArray(256); try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class .getResourceAsStream("UnicodeData.txt")))) { while (true) { @@ -97,13 +101,15 @@ public final class UnicodeSupport { Byte charClass = classMap.get(fields[2]); classes.add(charClass != null ? charClass.intValue() : 0); - if (!fields[14].isEmpty()) { - int titleCaseCode = parseHex(fields[14]); - if (fields[12].isEmpty() || parseHex(fields[12]) != titleCaseCode) { - titleCaseMapping.add(charCode); - titleCaseMapping.add(titleCaseCode); - } + int upperCaseCode = !fields[12].isEmpty() ? parseHex(fields[12]) : charCode; + encodeCaseMapping(upperCaseMapping, charCode, upperCaseCode); + int lowerCaseCode = !fields[13].isEmpty() ? parseHex(fields[13]) : charCode; + encodeCaseMapping(lowerCaseMapping, charCode, lowerCaseCode); + int titleCaseCode = !fields[14].isEmpty() ? parseHex(fields[14]) : charCode; + if (titleCaseCode == upperCaseCode) { + titleCaseCode = charCode; } + encodeCaseMapping(titleCaseMapping, charCode, titleCaseCode); } } catch (IOException e) { throw new RuntimeException("Error reading unicode data", e); @@ -131,6 +137,16 @@ public final class UnicodeSupport { UnicodeSupport.classes[i] = (byte) classes.get(i); } UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll(); + UnicodeSupport.upperCaseMapping = upperCaseMapping.getAll(); + UnicodeSupport.lowerCaseMapping = lowerCaseMapping.getAll(); + } + + private static void encodeCaseMapping(IntegerArray array, int codePoint, int mappedCodePoint) { + int diff = mappedCodePoint - codePoint; + if (array.size() == 0 || diff != array.get(array.size() - 1)) { + array.add(codePoint); + array.add(diff); + } } private static String[] splitLine(String line) { @@ -216,4 +232,14 @@ public final class UnicodeSupport { ensureUnicodeData(); return titleCaseMapping; } + + public static int[] getUpperCaseMapping() { + ensureUnicodeData(); + return upperCaseMapping; + } + + public static int[] getLowerCaseMapping() { + ensureUnicodeData(); + return lowerCaseMapping; + } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java b/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java index be96a14bd..944c3e99a 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/CharacterMetadataGenerator.java @@ -32,7 +32,11 @@ public class CharacterMetadataGenerator implements MetadataGenerator { case "obtainClasses": return generateObtainClasses(context); case "acquireTitleCaseMapping": - return generateObtainTitleCaseMapping(context); + return generateAcquireTitleCaseMapping(context); + case "acquireUpperCaseMapping": + return generateAcquireUpperCaseMapping(context); + case "acquireLowerCaseMapping": + return generateAcquireLowerCaseMapping(context); default: return null; } @@ -50,9 +54,21 @@ public class CharacterMetadataGenerator implements MetadataGenerator { return res; } - private Resource generateObtainTitleCaseMapping(MetadataGeneratorContext context) { + private Resource generateAcquireTitleCaseMapping(MetadataGeneratorContext context) { StringResource res = context.createResource(StringResource.class); - res.setValue(UnicodeHelper.encodeIntDiff(UnicodeSupport.getTitleCaseMapping())); + res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getTitleCaseMapping())); + return res; + } + + private Resource generateAcquireUpperCaseMapping(MetadataGeneratorContext context) { + StringResource res = context.createResource(StringResource.class); + res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getUpperCaseMapping())); + return res; + } + + private Resource generateAcquireLowerCaseMapping(MetadataGeneratorContext context) { + StringResource res = context.createResource(StringResource.class); + res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getLowerCaseMapping())); return res; } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java index 9ef3e9223..591afa89e 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java @@ -16,11 +16,6 @@ package org.teavm.classlib.java.lang; import org.teavm.classlib.impl.unicode.UnicodeHelper; -import org.teavm.interop.DelegateTo; -import org.teavm.interop.Import; -import org.teavm.interop.Unmanaged; -import org.teavm.interop.c.Include; -import org.teavm.platform.Platform; import org.teavm.platform.metadata.StringResource; public class TCharacter extends TObject implements TComparable { @@ -92,6 +87,8 @@ public class TCharacter extends TObject implements TComparable { static final int ERROR = 0xFFFFFFFF; private static int[] digitMapping; private static int[] titleCaseMapping; + private static int[] upperCaseMapping; + private static int[] lowerCaseMapping; private static UnicodeHelper.Range[] classMapping; private char value; private static TCharacter[] characterCache = new TCharacter[128]; @@ -235,54 +232,43 @@ public class TCharacter extends TObject implements TComparable { return (char) toLowerCase((int) ch); } - @DelegateTo("toLowerCaseLowLevel") public static int toLowerCase(int ch) { - return Platform.stringFromCharCode(ch).toLowerCase().charCodeAt(0); + return mapChar(getLowerCaseMapping(), ch); } - private static int toLowerCaseLowLevel(int codePoint) { - return toLowerCaseSystem(codePoint); + private static int[] getLowerCaseMapping() { + if (lowerCaseMapping == null) { + lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue()); + } + return lowerCaseMapping; } - @Import(module = "teavm", name = "towlower") - @Include("wctype.h") - @Unmanaged - private static native int toLowerCaseSystem(int codePoint); + private static native StringResource acquireLowerCaseMapping(); + public static char toUpperCase(char ch) { return (char) toUpperCase((int) ch); } - @DelegateTo("toUpperCaseLowLevel") public static int toUpperCase(int codePoint) { - return Platform.stringFromCharCode(codePoint).toUpperCase().charCodeAt(0); + return mapChar(getUpperCaseMapping(), codePoint); } - private static int toUpperCaseLowLevel(int codePoint) { - return toUpperCaseSystem(codePoint); + private static int[] getUpperCaseMapping() { + if (upperCaseMapping == null) { + upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue()); + } + return upperCaseMapping; } - @Import(module = "teavm", name = "towupper") - @Include("wctype.h") - @Unmanaged - private static native int toUpperCaseSystem(int codePoint); + private static native StringResource acquireUpperCaseMapping(); public static int toTitleCase(int codePoint) { - int[] mapping = getTitleCaseMapping(); - int l = 0; - int u = (mapping.length / 2) - 1; - while (u >= l) { - int idx = (l + u) / 2; - int val = mapping[idx * 2]; - if (codePoint > val) { - l = idx + 1; - } else if (codePoint < val) { - u = idx - 1; - } else { - return mapping[idx * 2 + 1]; - } + codePoint = mapChar(getTitleCaseMapping(), codePoint); + if (codePoint == codePoint) { + codePoint = toUpperCase(codePoint); } - return toUpperCase(codePoint); + return codePoint; } public static char toTitleCase(char c) { @@ -291,13 +277,43 @@ public class TCharacter extends TObject implements TComparable { private static int[] getTitleCaseMapping() { if (titleCaseMapping == null) { - titleCaseMapping = UnicodeHelper.decodeIntDiff(acquireTitleCaseMapping().getValue()); + titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue()); } return titleCaseMapping; } private static native StringResource acquireTitleCaseMapping(); + private static int mapChar(int[] table, int codePoint) { + int index = binarySearchTable(table, codePoint); + if (index < 0 || index >= table.length / 2) { + return 0; + } + return codePoint + table[index * 2 + 1]; + } + + private static int binarySearchTable(int[] data, int key) { + int l = 0; + int u = data.length / 2 - 1; + while (true) { + int i = (l + u) / 2; + int e = data[i * 2]; + if (e == key) { + return i; + } else if (e > key) { + u = i - 1; + if (u < l) { + return i - 1; + } + } else { + l = i + 1; + if (l > u) { + return i; + } + } + } + } + public static int digit(char ch, int radix) { return digit((int) ch, radix); } diff --git a/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java b/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java index a47fdf442..380e36b5a 100644 --- a/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java +++ b/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java @@ -39,4 +39,36 @@ public class CharacterTest { assertEquals(Character.NON_SPACING_MARK, Character.getType(0xFE25)); assertEquals(Character.DECIMAL_DIGIT_NUMBER, Character.getType(0x1D7D9)); } + + @Test + public void lowerCase() { + assertEquals('1', Character.toLowerCase('1')); + assertEquals('a', Character.toLowerCase('a')); + assertEquals('b', Character.toLowerCase('b')); + assertEquals('z', Character.toLowerCase('z')); + assertEquals('@', Character.toLowerCase('@')); + assertEquals('a', Character.toLowerCase('A')); + assertEquals('b', Character.toLowerCase('B')); + assertEquals('z', Character.toLowerCase('Z')); + assertEquals('щ', Character.toLowerCase('щ')); + assertEquals('щ', Character.toLowerCase('Щ')); + assertEquals('ü', Character.toLowerCase('ü')); + assertEquals('ü', Character.toLowerCase('Ü')); + } + + @Test + public void upperCase() { + assertEquals('1', Character.toUpperCase('1')); + assertEquals('A', Character.toUpperCase('a')); + assertEquals('B', Character.toUpperCase('b')); + assertEquals('Z', Character.toUpperCase('z')); + assertEquals('@', Character.toUpperCase('@')); + assertEquals('A', Character.toUpperCase('A')); + assertEquals('B', Character.toUpperCase('B')); + assertEquals('Z', Character.toUpperCase('Z')); + assertEquals('Щ', Character.toUpperCase('щ')); + assertEquals('Щ', Character.toUpperCase('Щ')); + assertEquals('Ü', Character.toUpperCase('ü')); + assertEquals('Ü', Character.toUpperCase('Ü')); + } }