mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2025-01-08 16:04:10 -08:00
classlib: use built-in unicode table to implement tolower/toupper methods.
This makes it possible to use TeaVM in environments where these functions aren't natively available. For example, WASI
This commit is contained in:
parent
a49f47dac7
commit
292aa21aef
|
@ -237,6 +237,10 @@ public class JCLPlugin implements TeaVMPlugin {
|
||||||
new CharacterMetadataGenerator());
|
new CharacterMetadataGenerator());
|
||||||
reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class),
|
reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class),
|
||||||
new CharacterMetadataGenerator());
|
new CharacterMetadataGenerator());
|
||||||
|
reg.register(new MethodReference(Character.class, "acquireUpperCaseMapping", StringResource.class),
|
||||||
|
new CharacterMetadataGenerator());
|
||||||
|
reg.register(new MethodReference(Character.class, "acquireLowerCaseMapping", StringResource.class),
|
||||||
|
new CharacterMetadataGenerator());
|
||||||
}
|
}
|
||||||
|
|
||||||
@PlatformMarker
|
@PlatformMarker
|
||||||
|
|
|
@ -87,6 +87,20 @@ public final class UnicodeHelper {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String encodeCaseMapping(int[] data) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
int sz = data.length / 2;
|
||||||
|
Base46.encodeUnsigned(sb, sz);
|
||||||
|
int last = 0;
|
||||||
|
for (int i = 0; i < sz; i++) {
|
||||||
|
int v = data[i * 2];
|
||||||
|
Base46.encodeUnsigned(sb, v - last);
|
||||||
|
last = v;
|
||||||
|
Base46.encode(sb, data[i * 2 + 1]);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public static int[] decodeIntDiff(String text) {
|
public static int[] decodeIntDiff(String text) {
|
||||||
CharFlow flow = new CharFlow(text.toCharArray());
|
CharFlow flow = new CharFlow(text.toCharArray());
|
||||||
int sz = Base46.decodeUnsigned(flow);
|
int sz = Base46.decodeUnsigned(flow);
|
||||||
|
@ -99,6 +113,19 @@ public final class UnicodeHelper {
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int[] decodeCaseMapping(String text) {
|
||||||
|
CharFlow flow = new CharFlow(text.toCharArray());
|
||||||
|
int sz = Base46.decodeUnsigned(flow);
|
||||||
|
int[] data = new int[sz * 2];
|
||||||
|
int last = 0;
|
||||||
|
for (int i = 0; i < sz; i++) {
|
||||||
|
last += Base46.decodeUnsigned(flow);
|
||||||
|
data[i * 2] = last;
|
||||||
|
data[i * 2 + 1] = Base46.decode(flow);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
public static char encodeByte(byte b) {
|
public static char encodeByte(byte b) {
|
||||||
if (b < '\"' - ' ') {
|
if (b < '\"' - ' ') {
|
||||||
return (char) (b + ' ');
|
return (char) (b + ' ');
|
||||||
|
|
|
@ -32,6 +32,8 @@ public final class UnicodeSupport {
|
||||||
private static int[] digitValues;
|
private static int[] digitValues;
|
||||||
private static byte[] classes;
|
private static byte[] classes;
|
||||||
private static int[] titleCaseMapping;
|
private static int[] titleCaseMapping;
|
||||||
|
private static int[] upperCaseMapping;
|
||||||
|
private static int[] lowerCaseMapping;
|
||||||
private static Map<String, Byte> classMap = new HashMap<>();
|
private static Map<String, Byte> classMap = new HashMap<>();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
@ -74,6 +76,8 @@ public final class UnicodeSupport {
|
||||||
IntegerArray digitValues = new IntegerArray(4096);
|
IntegerArray digitValues = new IntegerArray(4096);
|
||||||
IntegerArray classes = new IntegerArray(65536);
|
IntegerArray classes = new IntegerArray(65536);
|
||||||
IntegerArray titleCaseMapping = new IntegerArray(256);
|
IntegerArray titleCaseMapping = new IntegerArray(256);
|
||||||
|
IntegerArray upperCaseMapping = new IntegerArray(256);
|
||||||
|
IntegerArray lowerCaseMapping = new IntegerArray(256);
|
||||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class
|
||||||
.getResourceAsStream("UnicodeData.txt")))) {
|
.getResourceAsStream("UnicodeData.txt")))) {
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -97,13 +101,15 @@ public final class UnicodeSupport {
|
||||||
Byte charClass = classMap.get(fields[2]);
|
Byte charClass = classMap.get(fields[2]);
|
||||||
classes.add(charClass != null ? charClass.intValue() : 0);
|
classes.add(charClass != null ? charClass.intValue() : 0);
|
||||||
|
|
||||||
if (!fields[14].isEmpty()) {
|
int upperCaseCode = !fields[12].isEmpty() ? parseHex(fields[12]) : charCode;
|
||||||
int titleCaseCode = parseHex(fields[14]);
|
encodeCaseMapping(upperCaseMapping, charCode, upperCaseCode);
|
||||||
if (fields[12].isEmpty() || parseHex(fields[12]) != titleCaseCode) {
|
int lowerCaseCode = !fields[13].isEmpty() ? parseHex(fields[13]) : charCode;
|
||||||
titleCaseMapping.add(charCode);
|
encodeCaseMapping(lowerCaseMapping, charCode, lowerCaseCode);
|
||||||
titleCaseMapping.add(titleCaseCode);
|
int titleCaseCode = !fields[14].isEmpty() ? parseHex(fields[14]) : charCode;
|
||||||
}
|
if (titleCaseCode == upperCaseCode) {
|
||||||
|
titleCaseCode = charCode;
|
||||||
}
|
}
|
||||||
|
encodeCaseMapping(titleCaseMapping, charCode, titleCaseCode);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Error reading unicode data", e);
|
throw new RuntimeException("Error reading unicode data", e);
|
||||||
|
@ -131,6 +137,16 @@ public final class UnicodeSupport {
|
||||||
UnicodeSupport.classes[i] = (byte) classes.get(i);
|
UnicodeSupport.classes[i] = (byte) classes.get(i);
|
||||||
}
|
}
|
||||||
UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll();
|
UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll();
|
||||||
|
UnicodeSupport.upperCaseMapping = upperCaseMapping.getAll();
|
||||||
|
UnicodeSupport.lowerCaseMapping = lowerCaseMapping.getAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void encodeCaseMapping(IntegerArray array, int codePoint, int mappedCodePoint) {
|
||||||
|
int diff = mappedCodePoint - codePoint;
|
||||||
|
if (array.size() == 0 || diff != array.get(array.size() - 1)) {
|
||||||
|
array.add(codePoint);
|
||||||
|
array.add(diff);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String[] splitLine(String line) {
|
private static String[] splitLine(String line) {
|
||||||
|
@ -216,4 +232,14 @@ public final class UnicodeSupport {
|
||||||
ensureUnicodeData();
|
ensureUnicodeData();
|
||||||
return titleCaseMapping;
|
return titleCaseMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int[] getUpperCaseMapping() {
|
||||||
|
ensureUnicodeData();
|
||||||
|
return upperCaseMapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int[] getLowerCaseMapping() {
|
||||||
|
ensureUnicodeData();
|
||||||
|
return lowerCaseMapping;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,11 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
|
||||||
case "obtainClasses":
|
case "obtainClasses":
|
||||||
return generateObtainClasses(context);
|
return generateObtainClasses(context);
|
||||||
case "acquireTitleCaseMapping":
|
case "acquireTitleCaseMapping":
|
||||||
return generateObtainTitleCaseMapping(context);
|
return generateAcquireTitleCaseMapping(context);
|
||||||
|
case "acquireUpperCaseMapping":
|
||||||
|
return generateAcquireUpperCaseMapping(context);
|
||||||
|
case "acquireLowerCaseMapping":
|
||||||
|
return generateAcquireLowerCaseMapping(context);
|
||||||
default:
|
default:
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -50,9 +54,21 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Resource generateObtainTitleCaseMapping(MetadataGeneratorContext context) {
|
private Resource generateAcquireTitleCaseMapping(MetadataGeneratorContext context) {
|
||||||
StringResource res = context.createResource(StringResource.class);
|
StringResource res = context.createResource(StringResource.class);
|
||||||
res.setValue(UnicodeHelper.encodeIntDiff(UnicodeSupport.getTitleCaseMapping()));
|
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getTitleCaseMapping()));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Resource generateAcquireUpperCaseMapping(MetadataGeneratorContext context) {
|
||||||
|
StringResource res = context.createResource(StringResource.class);
|
||||||
|
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getUpperCaseMapping()));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Resource generateAcquireLowerCaseMapping(MetadataGeneratorContext context) {
|
||||||
|
StringResource res = context.createResource(StringResource.class);
|
||||||
|
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getLowerCaseMapping()));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,11 +16,6 @@
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
||||||
import org.teavm.interop.DelegateTo;
|
|
||||||
import org.teavm.interop.Import;
|
|
||||||
import org.teavm.interop.Unmanaged;
|
|
||||||
import org.teavm.interop.c.Include;
|
|
||||||
import org.teavm.platform.Platform;
|
|
||||||
import org.teavm.platform.metadata.StringResource;
|
import org.teavm.platform.metadata.StringResource;
|
||||||
|
|
||||||
public class TCharacter extends TObject implements TComparable<TCharacter> {
|
public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
|
@ -92,6 +87,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
static final int ERROR = 0xFFFFFFFF;
|
static final int ERROR = 0xFFFFFFFF;
|
||||||
private static int[] digitMapping;
|
private static int[] digitMapping;
|
||||||
private static int[] titleCaseMapping;
|
private static int[] titleCaseMapping;
|
||||||
|
private static int[] upperCaseMapping;
|
||||||
|
private static int[] lowerCaseMapping;
|
||||||
private static UnicodeHelper.Range[] classMapping;
|
private static UnicodeHelper.Range[] classMapping;
|
||||||
private char value;
|
private char value;
|
||||||
private static TCharacter[] characterCache = new TCharacter[128];
|
private static TCharacter[] characterCache = new TCharacter[128];
|
||||||
|
@ -235,54 +232,43 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
return (char) toLowerCase((int) ch);
|
return (char) toLowerCase((int) ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@DelegateTo("toLowerCaseLowLevel")
|
|
||||||
public static int toLowerCase(int ch) {
|
public static int toLowerCase(int ch) {
|
||||||
return Platform.stringFromCharCode(ch).toLowerCase().charCodeAt(0);
|
return mapChar(getLowerCaseMapping(), ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int toLowerCaseLowLevel(int codePoint) {
|
private static int[] getLowerCaseMapping() {
|
||||||
return toLowerCaseSystem(codePoint);
|
if (lowerCaseMapping == null) {
|
||||||
|
lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue());
|
||||||
|
}
|
||||||
|
return lowerCaseMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Import(module = "teavm", name = "towlower")
|
private static native StringResource acquireLowerCaseMapping();
|
||||||
@Include("wctype.h")
|
|
||||||
@Unmanaged
|
|
||||||
private static native int toLowerCaseSystem(int codePoint);
|
|
||||||
|
|
||||||
public static char toUpperCase(char ch) {
|
public static char toUpperCase(char ch) {
|
||||||
return (char) toUpperCase((int) ch);
|
return (char) toUpperCase((int) ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@DelegateTo("toUpperCaseLowLevel")
|
|
||||||
public static int toUpperCase(int codePoint) {
|
public static int toUpperCase(int codePoint) {
|
||||||
return Platform.stringFromCharCode(codePoint).toUpperCase().charCodeAt(0);
|
return mapChar(getUpperCaseMapping(), codePoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int toUpperCaseLowLevel(int codePoint) {
|
private static int[] getUpperCaseMapping() {
|
||||||
return toUpperCaseSystem(codePoint);
|
if (upperCaseMapping == null) {
|
||||||
|
upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue());
|
||||||
|
}
|
||||||
|
return upperCaseMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Import(module = "teavm", name = "towupper")
|
private static native StringResource acquireUpperCaseMapping();
|
||||||
@Include("wctype.h")
|
|
||||||
@Unmanaged
|
|
||||||
private static native int toUpperCaseSystem(int codePoint);
|
|
||||||
|
|
||||||
public static int toTitleCase(int codePoint) {
|
public static int toTitleCase(int codePoint) {
|
||||||
int[] mapping = getTitleCaseMapping();
|
codePoint = mapChar(getTitleCaseMapping(), codePoint);
|
||||||
int l = 0;
|
if (codePoint == codePoint) {
|
||||||
int u = (mapping.length / 2) - 1;
|
codePoint = toUpperCase(codePoint);
|
||||||
while (u >= l) {
|
|
||||||
int idx = (l + u) / 2;
|
|
||||||
int val = mapping[idx * 2];
|
|
||||||
if (codePoint > val) {
|
|
||||||
l = idx + 1;
|
|
||||||
} else if (codePoint < val) {
|
|
||||||
u = idx - 1;
|
|
||||||
} else {
|
|
||||||
return mapping[idx * 2 + 1];
|
|
||||||
}
|
}
|
||||||
}
|
return codePoint;
|
||||||
return toUpperCase(codePoint);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static char toTitleCase(char c) {
|
public static char toTitleCase(char c) {
|
||||||
|
@ -291,13 +277,43 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
|
|
||||||
private static int[] getTitleCaseMapping() {
|
private static int[] getTitleCaseMapping() {
|
||||||
if (titleCaseMapping == null) {
|
if (titleCaseMapping == null) {
|
||||||
titleCaseMapping = UnicodeHelper.decodeIntDiff(acquireTitleCaseMapping().getValue());
|
titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue());
|
||||||
}
|
}
|
||||||
return titleCaseMapping;
|
return titleCaseMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static native StringResource acquireTitleCaseMapping();
|
private static native StringResource acquireTitleCaseMapping();
|
||||||
|
|
||||||
|
private static int mapChar(int[] table, int codePoint) {
|
||||||
|
int index = binarySearchTable(table, codePoint);
|
||||||
|
if (index < 0 || index >= table.length / 2) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return codePoint + table[index * 2 + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int binarySearchTable(int[] data, int key) {
|
||||||
|
int l = 0;
|
||||||
|
int u = data.length / 2 - 1;
|
||||||
|
while (true) {
|
||||||
|
int i = (l + u) / 2;
|
||||||
|
int e = data[i * 2];
|
||||||
|
if (e == key) {
|
||||||
|
return i;
|
||||||
|
} else if (e > key) {
|
||||||
|
u = i - 1;
|
||||||
|
if (u < l) {
|
||||||
|
return i - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
l = i + 1;
|
||||||
|
if (l > u) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static int digit(char ch, int radix) {
|
public static int digit(char ch, int radix) {
|
||||||
return digit((int) ch, radix);
|
return digit((int) ch, radix);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,4 +39,36 @@ public class CharacterTest {
|
||||||
assertEquals(Character.NON_SPACING_MARK, Character.getType(0xFE25));
|
assertEquals(Character.NON_SPACING_MARK, Character.getType(0xFE25));
|
||||||
assertEquals(Character.DECIMAL_DIGIT_NUMBER, Character.getType(0x1D7D9));
|
assertEquals(Character.DECIMAL_DIGIT_NUMBER, Character.getType(0x1D7D9));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void lowerCase() {
|
||||||
|
assertEquals('1', Character.toLowerCase('1'));
|
||||||
|
assertEquals('a', Character.toLowerCase('a'));
|
||||||
|
assertEquals('b', Character.toLowerCase('b'));
|
||||||
|
assertEquals('z', Character.toLowerCase('z'));
|
||||||
|
assertEquals('@', Character.toLowerCase('@'));
|
||||||
|
assertEquals('a', Character.toLowerCase('A'));
|
||||||
|
assertEquals('b', Character.toLowerCase('B'));
|
||||||
|
assertEquals('z', Character.toLowerCase('Z'));
|
||||||
|
assertEquals('щ', Character.toLowerCase('щ'));
|
||||||
|
assertEquals('щ', Character.toLowerCase('Щ'));
|
||||||
|
assertEquals('ü', Character.toLowerCase('ü'));
|
||||||
|
assertEquals('ü', Character.toLowerCase('Ü'));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void upperCase() {
|
||||||
|
assertEquals('1', Character.toUpperCase('1'));
|
||||||
|
assertEquals('A', Character.toUpperCase('a'));
|
||||||
|
assertEquals('B', Character.toUpperCase('b'));
|
||||||
|
assertEquals('Z', Character.toUpperCase('z'));
|
||||||
|
assertEquals('@', Character.toUpperCase('@'));
|
||||||
|
assertEquals('A', Character.toUpperCase('A'));
|
||||||
|
assertEquals('B', Character.toUpperCase('B'));
|
||||||
|
assertEquals('Z', Character.toUpperCase('Z'));
|
||||||
|
assertEquals('Щ', Character.toUpperCase('щ'));
|
||||||
|
assertEquals('Щ', Character.toUpperCase('Щ'));
|
||||||
|
assertEquals('Ü', Character.toUpperCase('ü'));
|
||||||
|
assertEquals('Ü', Character.toUpperCase('Ü'));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user