mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2024-12-22 08:14:09 -08:00
classlib: use built-in unicode table to implement tolower/toupper methods.
This makes it possible to use TeaVM in environments where these functions aren't natively available. For example, WASI
This commit is contained in:
parent
a49f47dac7
commit
292aa21aef
|
@ -237,6 +237,10 @@ public class JCLPlugin implements TeaVMPlugin {
|
|||
new CharacterMetadataGenerator());
|
||||
reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class),
|
||||
new CharacterMetadataGenerator());
|
||||
reg.register(new MethodReference(Character.class, "acquireUpperCaseMapping", StringResource.class),
|
||||
new CharacterMetadataGenerator());
|
||||
reg.register(new MethodReference(Character.class, "acquireLowerCaseMapping", StringResource.class),
|
||||
new CharacterMetadataGenerator());
|
||||
}
|
||||
|
||||
@PlatformMarker
|
||||
|
|
|
@ -87,6 +87,20 @@ public final class UnicodeHelper {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String encodeCaseMapping(int[] data) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int sz = data.length / 2;
|
||||
Base46.encodeUnsigned(sb, sz);
|
||||
int last = 0;
|
||||
for (int i = 0; i < sz; i++) {
|
||||
int v = data[i * 2];
|
||||
Base46.encodeUnsigned(sb, v - last);
|
||||
last = v;
|
||||
Base46.encode(sb, data[i * 2 + 1]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static int[] decodeIntDiff(String text) {
|
||||
CharFlow flow = new CharFlow(text.toCharArray());
|
||||
int sz = Base46.decodeUnsigned(flow);
|
||||
|
@ -99,6 +113,19 @@ public final class UnicodeHelper {
|
|||
return data;
|
||||
}
|
||||
|
||||
public static int[] decodeCaseMapping(String text) {
|
||||
CharFlow flow = new CharFlow(text.toCharArray());
|
||||
int sz = Base46.decodeUnsigned(flow);
|
||||
int[] data = new int[sz * 2];
|
||||
int last = 0;
|
||||
for (int i = 0; i < sz; i++) {
|
||||
last += Base46.decodeUnsigned(flow);
|
||||
data[i * 2] = last;
|
||||
data[i * 2 + 1] = Base46.decode(flow);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
public static char encodeByte(byte b) {
|
||||
if (b < '\"' - ' ') {
|
||||
return (char) (b + ' ');
|
||||
|
|
|
@ -32,6 +32,8 @@ public final class UnicodeSupport {
|
|||
private static int[] digitValues;
|
||||
private static byte[] classes;
|
||||
private static int[] titleCaseMapping;
|
||||
private static int[] upperCaseMapping;
|
||||
private static int[] lowerCaseMapping;
|
||||
private static Map<String, Byte> classMap = new HashMap<>();
|
||||
|
||||
static {
|
||||
|
@ -74,6 +76,8 @@ public final class UnicodeSupport {
|
|||
IntegerArray digitValues = new IntegerArray(4096);
|
||||
IntegerArray classes = new IntegerArray(65536);
|
||||
IntegerArray titleCaseMapping = new IntegerArray(256);
|
||||
IntegerArray upperCaseMapping = new IntegerArray(256);
|
||||
IntegerArray lowerCaseMapping = new IntegerArray(256);
|
||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class
|
||||
.getResourceAsStream("UnicodeData.txt")))) {
|
||||
while (true) {
|
||||
|
@ -97,13 +101,15 @@ public final class UnicodeSupport {
|
|||
Byte charClass = classMap.get(fields[2]);
|
||||
classes.add(charClass != null ? charClass.intValue() : 0);
|
||||
|
||||
if (!fields[14].isEmpty()) {
|
||||
int titleCaseCode = parseHex(fields[14]);
|
||||
if (fields[12].isEmpty() || parseHex(fields[12]) != titleCaseCode) {
|
||||
titleCaseMapping.add(charCode);
|
||||
titleCaseMapping.add(titleCaseCode);
|
||||
}
|
||||
int upperCaseCode = !fields[12].isEmpty() ? parseHex(fields[12]) : charCode;
|
||||
encodeCaseMapping(upperCaseMapping, charCode, upperCaseCode);
|
||||
int lowerCaseCode = !fields[13].isEmpty() ? parseHex(fields[13]) : charCode;
|
||||
encodeCaseMapping(lowerCaseMapping, charCode, lowerCaseCode);
|
||||
int titleCaseCode = !fields[14].isEmpty() ? parseHex(fields[14]) : charCode;
|
||||
if (titleCaseCode == upperCaseCode) {
|
||||
titleCaseCode = charCode;
|
||||
}
|
||||
encodeCaseMapping(titleCaseMapping, charCode, titleCaseCode);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error reading unicode data", e);
|
||||
|
@ -131,6 +137,16 @@ public final class UnicodeSupport {
|
|||
UnicodeSupport.classes[i] = (byte) classes.get(i);
|
||||
}
|
||||
UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll();
|
||||
UnicodeSupport.upperCaseMapping = upperCaseMapping.getAll();
|
||||
UnicodeSupport.lowerCaseMapping = lowerCaseMapping.getAll();
|
||||
}
|
||||
|
||||
private static void encodeCaseMapping(IntegerArray array, int codePoint, int mappedCodePoint) {
|
||||
int diff = mappedCodePoint - codePoint;
|
||||
if (array.size() == 0 || diff != array.get(array.size() - 1)) {
|
||||
array.add(codePoint);
|
||||
array.add(diff);
|
||||
}
|
||||
}
|
||||
|
||||
private static String[] splitLine(String line) {
|
||||
|
@ -216,4 +232,14 @@ public final class UnicodeSupport {
|
|||
ensureUnicodeData();
|
||||
return titleCaseMapping;
|
||||
}
|
||||
|
||||
public static int[] getUpperCaseMapping() {
|
||||
ensureUnicodeData();
|
||||
return upperCaseMapping;
|
||||
}
|
||||
|
||||
public static int[] getLowerCaseMapping() {
|
||||
ensureUnicodeData();
|
||||
return lowerCaseMapping;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,11 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
|
|||
case "obtainClasses":
|
||||
return generateObtainClasses(context);
|
||||
case "acquireTitleCaseMapping":
|
||||
return generateObtainTitleCaseMapping(context);
|
||||
return generateAcquireTitleCaseMapping(context);
|
||||
case "acquireUpperCaseMapping":
|
||||
return generateAcquireUpperCaseMapping(context);
|
||||
case "acquireLowerCaseMapping":
|
||||
return generateAcquireLowerCaseMapping(context);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
@ -50,9 +54,21 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
|
|||
return res;
|
||||
}
|
||||
|
||||
private Resource generateObtainTitleCaseMapping(MetadataGeneratorContext context) {
|
||||
private Resource generateAcquireTitleCaseMapping(MetadataGeneratorContext context) {
|
||||
StringResource res = context.createResource(StringResource.class);
|
||||
res.setValue(UnicodeHelper.encodeIntDiff(UnicodeSupport.getTitleCaseMapping()));
|
||||
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getTitleCaseMapping()));
|
||||
return res;
|
||||
}
|
||||
|
||||
private Resource generateAcquireUpperCaseMapping(MetadataGeneratorContext context) {
|
||||
StringResource res = context.createResource(StringResource.class);
|
||||
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getUpperCaseMapping()));
|
||||
return res;
|
||||
}
|
||||
|
||||
private Resource generateAcquireLowerCaseMapping(MetadataGeneratorContext context) {
|
||||
StringResource res = context.createResource(StringResource.class);
|
||||
res.setValue(UnicodeHelper.encodeCaseMapping(UnicodeSupport.getLowerCaseMapping()));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,11 +16,6 @@
|
|||
package org.teavm.classlib.java.lang;
|
||||
|
||||
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
||||
import org.teavm.interop.DelegateTo;
|
||||
import org.teavm.interop.Import;
|
||||
import org.teavm.interop.Unmanaged;
|
||||
import org.teavm.interop.c.Include;
|
||||
import org.teavm.platform.Platform;
|
||||
import org.teavm.platform.metadata.StringResource;
|
||||
|
||||
public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
|
@ -92,6 +87,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
|||
static final int ERROR = 0xFFFFFFFF;
|
||||
private static int[] digitMapping;
|
||||
private static int[] titleCaseMapping;
|
||||
private static int[] upperCaseMapping;
|
||||
private static int[] lowerCaseMapping;
|
||||
private static UnicodeHelper.Range[] classMapping;
|
||||
private char value;
|
||||
private static TCharacter[] characterCache = new TCharacter[128];
|
||||
|
@ -235,54 +232,43 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
|||
return (char) toLowerCase((int) ch);
|
||||
}
|
||||
|
||||
@DelegateTo("toLowerCaseLowLevel")
|
||||
public static int toLowerCase(int ch) {
|
||||
return Platform.stringFromCharCode(ch).toLowerCase().charCodeAt(0);
|
||||
return mapChar(getLowerCaseMapping(), ch);
|
||||
}
|
||||
|
||||
private static int toLowerCaseLowLevel(int codePoint) {
|
||||
return toLowerCaseSystem(codePoint);
|
||||
private static int[] getLowerCaseMapping() {
|
||||
if (lowerCaseMapping == null) {
|
||||
lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue());
|
||||
}
|
||||
return lowerCaseMapping;
|
||||
}
|
||||
|
||||
@Import(module = "teavm", name = "towlower")
|
||||
@Include("wctype.h")
|
||||
@Unmanaged
|
||||
private static native int toLowerCaseSystem(int codePoint);
|
||||
private static native StringResource acquireLowerCaseMapping();
|
||||
|
||||
|
||||
public static char toUpperCase(char ch) {
|
||||
return (char) toUpperCase((int) ch);
|
||||
}
|
||||
|
||||
@DelegateTo("toUpperCaseLowLevel")
|
||||
public static int toUpperCase(int codePoint) {
|
||||
return Platform.stringFromCharCode(codePoint).toUpperCase().charCodeAt(0);
|
||||
return mapChar(getUpperCaseMapping(), codePoint);
|
||||
}
|
||||
|
||||
private static int toUpperCaseLowLevel(int codePoint) {
|
||||
return toUpperCaseSystem(codePoint);
|
||||
private static int[] getUpperCaseMapping() {
|
||||
if (upperCaseMapping == null) {
|
||||
upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue());
|
||||
}
|
||||
return upperCaseMapping;
|
||||
}
|
||||
|
||||
@Import(module = "teavm", name = "towupper")
|
||||
@Include("wctype.h")
|
||||
@Unmanaged
|
||||
private static native int toUpperCaseSystem(int codePoint);
|
||||
private static native StringResource acquireUpperCaseMapping();
|
||||
|
||||
public static int toTitleCase(int codePoint) {
|
||||
int[] mapping = getTitleCaseMapping();
|
||||
int l = 0;
|
||||
int u = (mapping.length / 2) - 1;
|
||||
while (u >= l) {
|
||||
int idx = (l + u) / 2;
|
||||
int val = mapping[idx * 2];
|
||||
if (codePoint > val) {
|
||||
l = idx + 1;
|
||||
} else if (codePoint < val) {
|
||||
u = idx - 1;
|
||||
} else {
|
||||
return mapping[idx * 2 + 1];
|
||||
codePoint = mapChar(getTitleCaseMapping(), codePoint);
|
||||
if (codePoint == codePoint) {
|
||||
codePoint = toUpperCase(codePoint);
|
||||
}
|
||||
}
|
||||
return toUpperCase(codePoint);
|
||||
return codePoint;
|
||||
}
|
||||
|
||||
public static char toTitleCase(char c) {
|
||||
|
@ -291,13 +277,43 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
|||
|
||||
private static int[] getTitleCaseMapping() {
|
||||
if (titleCaseMapping == null) {
|
||||
titleCaseMapping = UnicodeHelper.decodeIntDiff(acquireTitleCaseMapping().getValue());
|
||||
titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue());
|
||||
}
|
||||
return titleCaseMapping;
|
||||
}
|
||||
|
||||
private static native StringResource acquireTitleCaseMapping();
|
||||
|
||||
private static int mapChar(int[] table, int codePoint) {
|
||||
int index = binarySearchTable(table, codePoint);
|
||||
if (index < 0 || index >= table.length / 2) {
|
||||
return 0;
|
||||
}
|
||||
return codePoint + table[index * 2 + 1];
|
||||
}
|
||||
|
||||
private static int binarySearchTable(int[] data, int key) {
|
||||
int l = 0;
|
||||
int u = data.length / 2 - 1;
|
||||
while (true) {
|
||||
int i = (l + u) / 2;
|
||||
int e = data[i * 2];
|
||||
if (e == key) {
|
||||
return i;
|
||||
} else if (e > key) {
|
||||
u = i - 1;
|
||||
if (u < l) {
|
||||
return i - 1;
|
||||
}
|
||||
} else {
|
||||
l = i + 1;
|
||||
if (l > u) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static int digit(char ch, int radix) {
|
||||
return digit((int) ch, radix);
|
||||
}
|
||||
|
|
|
@ -39,4 +39,36 @@ public class CharacterTest {
|
|||
assertEquals(Character.NON_SPACING_MARK, Character.getType(0xFE25));
|
||||
assertEquals(Character.DECIMAL_DIGIT_NUMBER, Character.getType(0x1D7D9));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lowerCase() {
|
||||
assertEquals('1', Character.toLowerCase('1'));
|
||||
assertEquals('a', Character.toLowerCase('a'));
|
||||
assertEquals('b', Character.toLowerCase('b'));
|
||||
assertEquals('z', Character.toLowerCase('z'));
|
||||
assertEquals('@', Character.toLowerCase('@'));
|
||||
assertEquals('a', Character.toLowerCase('A'));
|
||||
assertEquals('b', Character.toLowerCase('B'));
|
||||
assertEquals('z', Character.toLowerCase('Z'));
|
||||
assertEquals('щ', Character.toLowerCase('щ'));
|
||||
assertEquals('щ', Character.toLowerCase('Щ'));
|
||||
assertEquals('ü', Character.toLowerCase('ü'));
|
||||
assertEquals('ü', Character.toLowerCase('Ü'));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void upperCase() {
|
||||
assertEquals('1', Character.toUpperCase('1'));
|
||||
assertEquals('A', Character.toUpperCase('a'));
|
||||
assertEquals('B', Character.toUpperCase('b'));
|
||||
assertEquals('Z', Character.toUpperCase('z'));
|
||||
assertEquals('@', Character.toUpperCase('@'));
|
||||
assertEquals('A', Character.toUpperCase('A'));
|
||||
assertEquals('B', Character.toUpperCase('B'));
|
||||
assertEquals('Z', Character.toUpperCase('Z'));
|
||||
assertEquals('Щ', Character.toUpperCase('щ'));
|
||||
assertEquals('Щ', Character.toUpperCase('Щ'));
|
||||
assertEquals('Ü', Character.toUpperCase('ü'));
|
||||
assertEquals('Ü', Character.toUpperCase('Ü'));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user