classlib: improve performance of String.toLower/toUpper

This commit is contained in:
Alexey Andreev 2023-09-30 07:17:22 +02:00
parent ee0ebd4c2b
commit fc3d92f58d
4 changed files with 122 additions and 13 deletions

View File

@ -0,0 +1,26 @@
/*
* Copyright 2023 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.unicode;
public class CharMapping {
public final int[] binarySearchTable;
public final int[] fastTable;
public CharMapping(int[] binarySearchTable, int[] fastTable) {
this.binarySearchTable = binarySearchTable;
this.fastTable = fastTable;
}
}

View File

@ -126,6 +126,26 @@ public final class UnicodeHelper {
return data; return data;
} }
public static CharMapping createCharMapping(int[] data) {
var result = new int[65536];
var last = 0;
var lastValue = 0;
for (var i = 0; i < data.length; i += 2) {
var key = data[i];
var value = data[i + 1];
if (key >= result.length) {
if (key == last) {
break;
}
key = result.length;
}
Arrays.fill(result, last, key, lastValue);
last = key;
lastValue = value;
}
return new CharMapping(data, result);
}
public static char encodeByte(byte b) { public static char encodeByte(byte b) {
if (b < '\"' - ' ') { if (b < '\"' - ' ') {
return (char) (b + ' '); return (char) (b + ' ');

View File

@ -15,6 +15,7 @@
*/ */
package org.teavm.classlib.java.lang; package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.unicode.CharMapping;
import org.teavm.classlib.impl.unicode.UnicodeHelper; import org.teavm.classlib.impl.unicode.UnicodeHelper;
import org.teavm.platform.metadata.StringResource; import org.teavm.platform.metadata.StringResource;
@ -87,9 +88,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
public static final int BYTES = SIZE / Byte.SIZE; public static final int BYTES = SIZE / Byte.SIZE;
static final int ERROR = 0xFFFFFFFF; static final int ERROR = 0xFFFFFFFF;
private static int[] digitMapping; private static int[] digitMapping;
private static int[] titleCaseMapping; private static CharMapping titleCaseMapping;
private static int[] upperCaseMapping; private static CharMapping upperCaseMapping;
private static int[] lowerCaseMapping; private static CharMapping lowerCaseMapping;
private static UnicodeHelper.Range[] classMapping; private static UnicodeHelper.Range[] classMapping;
private final char value; private final char value;
private static TCharacter[] characterCache = new TCharacter[128]; private static TCharacter[] characterCache = new TCharacter[128];
@ -241,9 +242,10 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return mapChar(getLowerCaseMapping(), ch); return mapChar(getLowerCaseMapping(), ch);
} }
private static int[] getLowerCaseMapping() { private static CharMapping getLowerCaseMapping() {
if (lowerCaseMapping == null) { if (lowerCaseMapping == null) {
lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue()); lowerCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue()));
} }
return lowerCaseMapping; return lowerCaseMapping;
} }
@ -259,9 +261,10 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return mapChar(getUpperCaseMapping(), codePoint); return mapChar(getUpperCaseMapping(), codePoint);
} }
private static int[] getUpperCaseMapping() { private static CharMapping getUpperCaseMapping() {
if (upperCaseMapping == null) { if (upperCaseMapping == null) {
upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue()); upperCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue()));
} }
return upperCaseMapping; return upperCaseMapping;
} }
@ -280,21 +283,27 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return (char) toTitleCase((int) c); return (char) toTitleCase((int) c);
} }
private static int[] getTitleCaseMapping() { private static CharMapping getTitleCaseMapping() {
if (titleCaseMapping == null) { if (titleCaseMapping == null) {
titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue()); titleCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue()));
} }
return titleCaseMapping; return titleCaseMapping;
} }
private static native StringResource acquireTitleCaseMapping(); private static native StringResource acquireTitleCaseMapping();
private static int mapChar(int[] table, int codePoint) { private static int mapChar(CharMapping table, int codePoint) {
int index = binarySearchTable(table, codePoint); if (codePoint < table.fastTable.length) {
if (index < 0 || index >= table.length / 2) { return codePoint + table.fastTable[codePoint];
}
var binSearchTable = table.binarySearchTable;
int index = binarySearchTable(binSearchTable, codePoint);
if (index < 0 || index * 2 >= binSearchTable.length) {
return 0; return 0;
} }
return codePoint + table[index * 2 + 1]; return codePoint + binSearchTable[index * 2 + 1];
} }
private static int binarySearchTable(int[] data, int key) { private static int binarySearchTable(int[] data, int key) {

View File

@ -640,6 +640,33 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
if (isEmpty()) { if (isEmpty()) {
return this; return this;
} }
var hasCharsToTransform = false;
var hasSurrogates = false;
for (var c : characters) {
if (Character.toLowerCase(c) != c) {
hasCharsToTransform = true;
break;
}
if (Character.isSurrogate(c)) {
hasSurrogates = true;
}
}
if (!hasCharsToTransform) {
return this;
}
return hasSurrogates ? toLowerCaseCodePoints() : toLowerCaseChars();
}
private TString toLowerCaseChars() {
var chars = new char[characters.length];
for (int i = 0; i < characters.length; ++i) {
chars[i] = TCharacter.toLowerCase(characters[i]);
}
return new TString(chars);
}
private TString toLowerCaseCodePoints() {
int[] codePoints = new int[characters.length]; int[] codePoints = new int[characters.length];
int codePointCount = 0; int codePointCount = 0;
for (int i = 0; i < characters.length; ++i) { for (int i = 0; i < characters.length; ++i) {
@ -663,6 +690,33 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
if (isEmpty()) { if (isEmpty()) {
return this; return this;
} }
var hasCharsToTransform = false;
var hasSurrogates = false;
for (var c : characters) {
if (Character.toUpperCase(c) != c) {
hasCharsToTransform = true;
break;
}
if (Character.isSurrogate(c)) {
hasSurrogates = true;
}
}
if (!hasCharsToTransform) {
return this;
}
return hasSurrogates ? toUpperCaseCodePoints() : toUpperCaseChars();
}
private TString toUpperCaseChars() {
var chars = new char[characters.length];
for (int i = 0; i < characters.length; ++i) {
chars[i] = TCharacter.toUpperCase(characters[i]);
}
return new TString(chars);
}
private TString toUpperCaseCodePoints() {
int[] codePoints = new int[characters.length]; int[] codePoints = new int[characters.length];
int codePointCount = 0; int codePointCount = 0;
for (int i = 0; i < characters.length; ++i) { for (int i = 0; i < characters.length; ++i) {