Add Character.toTitleCase. More efficient implementation for Character.digit

This commit is contained in:
Alexey Andreev 2020-04-14 18:31:19 +03:00
parent 0c8d93acbc
commit d50e048ea4
6 changed files with 115 additions and 10 deletions

View File

@ -210,6 +210,8 @@ public class JCLPlugin implements TeaVMPlugin {
new CharacterMetadataGenerator()); new CharacterMetadataGenerator());
reg.register(new MethodReference(Character.class, "obtainClasses", StringResource.class), reg.register(new MethodReference(Character.class, "obtainClasses", StringResource.class),
new CharacterMetadataGenerator()); new CharacterMetadataGenerator());
reg.register(new MethodReference(Character.class, "acquireTitleCaseMapping", StringResource.class),
new CharacterMetadataGenerator());
} }
@PlatformMarker @PlatformMarker

View File

@ -44,21 +44,58 @@ public final class UnicodeHelper {
} }
} }
public static String encodeIntByte(int[] data) { public static String encodeIntPairsDiff(int[] data) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
Base46.encode(sb, data.length); Base46.encodeUnsigned(sb, data.length / 2);
for (int i = 0; i < data.length; i++) { int lastKey = 0;
Base46.encode(sb, data[i]); int lastValue = 0;
for (int i = 0; i < data.length; i += 2) {
int key = data[i];
int value = data[i + 1];
Base46.encode(sb, key - lastKey);
Base46.encode(sb, value - lastValue);
lastKey = key;
lastValue = value;
} }
return sb.toString(); return sb.toString();
} }
public static int[] decodeIntByte(String text) { public static int[] decodeIntPairsDiff(String text) {
CharFlow flow = new CharFlow(text.toCharArray()); CharFlow flow = new CharFlow(text.toCharArray());
int sz = Base46.decode(flow); int sz = Base46.decodeUnsigned(flow);
int[] data = new int[sz]; int[] data = new int[sz * 2];
int j = 0;
int lastKey = 0;
int lastValue = 0;
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
data[i] = Base46.decode(flow); lastKey += Base46.decode(flow);
lastValue += Base46.decode(flow);
data[j++] = lastKey;
data[j++] = lastValue;
}
return data;
}
public static String encodeIntDiff(int[] data) {
StringBuilder sb = new StringBuilder();
Base46.encodeUnsigned(sb, data.length);
int last = 0;
for (int i = 0; i < data.length; i++) {
int v = data[i];
Base46.encode(sb, v - last);
last = v;
}
return sb.toString();
}
public static int[] decodeIntDiff(String text) {
CharFlow flow = new CharFlow(text.toCharArray());
int sz = Base46.decodeUnsigned(flow);
int[] data = new int[sz];
int last = 0;
for (int i = 0; i < sz; i++) {
last += Base46.decode(flow);
data[i] = last;
} }
return data; return data;
} }

View File

@ -31,6 +31,7 @@ public final class UnicodeSupport {
private static volatile CountDownLatch latch = new CountDownLatch(1); private static volatile CountDownLatch latch = new CountDownLatch(1);
private static int[] digitValues; private static int[] digitValues;
private static byte[] classes; private static byte[] classes;
private static int[] titleCaseMapping;
private static Map<String, Byte> classMap = new HashMap<>(); private static Map<String, Byte> classMap = new HashMap<>();
static { static {
@ -72,6 +73,7 @@ public final class UnicodeSupport {
private static void parseUnicodeData() { private static void parseUnicodeData() {
IntegerArray digitValues = new IntegerArray(4096); IntegerArray digitValues = new IntegerArray(4096);
IntegerArray classes = new IntegerArray(65536); IntegerArray classes = new IntegerArray(65536);
IntegerArray titleCaseMapping = new IntegerArray(256);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class try (BufferedReader reader = new BufferedReader(new InputStreamReader(UnicodeHelper.class
.getResourceAsStream("UnicodeData.txt")))) { .getResourceAsStream("UnicodeData.txt")))) {
while (true) { while (true) {
@ -94,6 +96,14 @@ public final class UnicodeSupport {
} }
Byte charClass = classMap.get(fields[2]); Byte charClass = classMap.get(fields[2]);
classes.add(charClass != null ? charClass.intValue() : 0); classes.add(charClass != null ? charClass.intValue() : 0);
if (!fields[14].isEmpty()) {
int titleCaseCode = parseHex(fields[14]);
if (fields[12].isEmpty() || parseHex(fields[12]) != titleCaseCode) {
titleCaseMapping.add(charCode);
titleCaseMapping.add(titleCaseCode);
}
}
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Error reading unicode data", e); throw new RuntimeException("Error reading unicode data", e);
@ -120,6 +130,7 @@ public final class UnicodeSupport {
for (int i = 0; i < classes.size(); ++i) { for (int i = 0; i < classes.size(); ++i) {
UnicodeSupport.classes[i] = (byte) classes.get(i); UnicodeSupport.classes[i] = (byte) classes.get(i);
} }
UnicodeSupport.titleCaseMapping = titleCaseMapping.getAll();
} }
private static String[] splitLine(String line) { private static String[] splitLine(String line) {
@ -200,4 +211,9 @@ public final class UnicodeSupport {
ensureUnicodeData(); ensureUnicodeData();
return classes; return classes;
} }
public static int[] getTitleCaseMapping() {
ensureUnicodeData();
return titleCaseMapping;
}
} }

View File

@ -202,6 +202,16 @@ public class TPrintStream extends TFilterOutputStream {
printSB(); printSB();
} }
public void println(char c) {
sb.append(c);
printSB();
}
public void println(boolean b) {
sb.append(b);
printSB();
}
public void println(String s) { public void println(String s) {
sb.append(s).append('\n'); sb.append(s).append('\n');
printSB(); printSB();

View File

@ -31,6 +31,8 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
return generateObtainDigitMapping(context); return generateObtainDigitMapping(context);
case "obtainClasses": case "obtainClasses":
return generateObtainClasses(context); return generateObtainClasses(context);
case "acquireTitleCaseMapping":
return generateObtainTitleCaseMapping(context);
default: default:
return null; return null;
} }
@ -38,7 +40,7 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
private Resource generateObtainDigitMapping(MetadataGeneratorContext context) { private Resource generateObtainDigitMapping(MetadataGeneratorContext context) {
StringResource res = context.createResource(StringResource.class); StringResource res = context.createResource(StringResource.class);
res.setValue(UnicodeHelper.encodeIntByte(UnicodeSupport.getDigitValues())); res.setValue(UnicodeHelper.encodeIntPairsDiff(UnicodeSupport.getDigitValues()));
return res; return res;
} }
@ -47,4 +49,10 @@ public class CharacterMetadataGenerator implements MetadataGenerator {
res.setValue(UnicodeHelper.compressRle(UnicodeSupport.getClasses())); res.setValue(UnicodeHelper.compressRle(UnicodeSupport.getClasses()));
return res; return res;
} }
private Resource generateObtainTitleCaseMapping(MetadataGeneratorContext context) {
StringResource res = context.createResource(StringResource.class);
res.setValue(UnicodeHelper.encodeIntDiff(UnicodeSupport.getTitleCaseMapping()));
return res;
}
} }

View File

@ -91,6 +91,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
public static final int SIZE = 16; public static final int SIZE = 16;
static final int ERROR = 0xFFFFFFFF; static final int ERROR = 0xFFFFFFFF;
private static int[] digitMapping; private static int[] digitMapping;
private static int[] titleCaseMapping;
private static UnicodeHelper.Range[] classMapping; private static UnicodeHelper.Range[] classMapping;
private char value; private char value;
private static TCharacter[] characterCache = new TCharacter[128]; private static TCharacter[] characterCache = new TCharacter[128];
@ -266,6 +267,37 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
@Unmanaged @Unmanaged
private static native int toUpperCaseSystem(int codePoint); private static native int toUpperCaseSystem(int codePoint);
public static int toTitleCase(int codePoint) {
int[] mapping = getTitleCaseMapping();
int l = 0;
int u = (mapping.length / 2) - 1;
while (u >= l) {
int idx = (l + u) / 2;
int val = mapping[idx * 2];
if (codePoint > val) {
l = idx + 1;
} else if (codePoint < val) {
u = idx - 1;
} else {
return mapping[idx * 2 + 1];
}
}
return toUpperCase(codePoint);
}
public static char toTitleCase(char c) {
return (char) toTitleCase((int) c);
}
private static int[] getTitleCaseMapping() {
if (titleCaseMapping == null) {
titleCaseMapping = UnicodeHelper.decodeIntDiff(acquireTitleCaseMapping().getValue());
}
return titleCaseMapping;
}
private static native StringResource acquireTitleCaseMapping();
public static int digit(char ch, int radix) { public static int digit(char ch, int radix) {
return digit((int) ch, radix); return digit((int) ch, radix);
} }
@ -317,7 +349,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
private static int[] getDigitMapping() { private static int[] getDigitMapping() {
if (digitMapping == null) { if (digitMapping == null) {
digitMapping = UnicodeHelper.decodeIntByte(obtainDigitMapping().getValue()); digitMapping = UnicodeHelper.decodeIntPairsDiff(obtainDigitMapping().getValue());
} }
return digitMapping; return digitMapping;
} }