classlib: improve performance of String.toLower/toUpper

This commit is contained in:
Alexey Andreev 2023-09-30 07:17:22 +02:00
parent ee0ebd4c2b
commit fc3d92f58d
4 changed files with 122 additions and 13 deletions

View File

@ -0,0 +1,26 @@
/*
* Copyright 2023 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.unicode;
public class CharMapping {
public final int[] binarySearchTable;
public final int[] fastTable;
public CharMapping(int[] binarySearchTable, int[] fastTable) {
this.binarySearchTable = binarySearchTable;
this.fastTable = fastTable;
}
}

View File

@ -126,6 +126,26 @@ public final class UnicodeHelper {
return data;
}
public static CharMapping createCharMapping(int[] data) {
var result = new int[65536];
var last = 0;
var lastValue = 0;
for (var i = 0; i < data.length; i += 2) {
var key = data[i];
var value = data[i + 1];
if (key >= result.length) {
if (key == last) {
break;
}
key = result.length;
}
Arrays.fill(result, last, key, lastValue);
last = key;
lastValue = value;
}
return new CharMapping(data, result);
}
public static char encodeByte(byte b) {
if (b < '\"' - ' ') {
return (char) (b + ' ');

View File

@ -15,6 +15,7 @@
*/
package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.unicode.CharMapping;
import org.teavm.classlib.impl.unicode.UnicodeHelper;
import org.teavm.platform.metadata.StringResource;
@ -87,9 +88,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
public static final int BYTES = SIZE / Byte.SIZE;
static final int ERROR = 0xFFFFFFFF;
private static int[] digitMapping;
private static int[] titleCaseMapping;
private static int[] upperCaseMapping;
private static int[] lowerCaseMapping;
private static CharMapping titleCaseMapping;
private static CharMapping upperCaseMapping;
private static CharMapping lowerCaseMapping;
private static UnicodeHelper.Range[] classMapping;
private final char value;
private static TCharacter[] characterCache = new TCharacter[128];
@ -241,9 +242,10 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return mapChar(getLowerCaseMapping(), ch);
}
private static int[] getLowerCaseMapping() {
private static CharMapping getLowerCaseMapping() {
if (lowerCaseMapping == null) {
lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue());
lowerCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue()));
}
return lowerCaseMapping;
}
@ -259,9 +261,10 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return mapChar(getUpperCaseMapping(), codePoint);
}
private static int[] getUpperCaseMapping() {
private static CharMapping getUpperCaseMapping() {
if (upperCaseMapping == null) {
upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue());
upperCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue()));
}
return upperCaseMapping;
}
@ -280,21 +283,27 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
return (char) toTitleCase((int) c);
}
private static int[] getTitleCaseMapping() {
private static CharMapping getTitleCaseMapping() {
if (titleCaseMapping == null) {
titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue());
titleCaseMapping = UnicodeHelper.createCharMapping(
UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue()));
}
return titleCaseMapping;
}
private static native StringResource acquireTitleCaseMapping();
private static int mapChar(int[] table, int codePoint) {
int index = binarySearchTable(table, codePoint);
if (index < 0 || index >= table.length / 2) {
private static int mapChar(CharMapping table, int codePoint) {
if (codePoint < table.fastTable.length) {
return codePoint + table.fastTable[codePoint];
}
var binSearchTable = table.binarySearchTable;
int index = binarySearchTable(binSearchTable, codePoint);
if (index < 0 || index * 2 >= binSearchTable.length) {
return 0;
}
return codePoint + table[index * 2 + 1];
return codePoint + binSearchTable[index * 2 + 1];
}
private static int binarySearchTable(int[] data, int key) {

View File

@ -640,6 +640,33 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
if (isEmpty()) {
return this;
}
var hasCharsToTransform = false;
var hasSurrogates = false;
for (var c : characters) {
if (Character.toLowerCase(c) != c) {
hasCharsToTransform = true;
break;
}
if (Character.isSurrogate(c)) {
hasSurrogates = true;
}
}
if (!hasCharsToTransform) {
return this;
}
return hasSurrogates ? toLowerCaseCodePoints() : toLowerCaseChars();
}
private TString toLowerCaseChars() {
var chars = new char[characters.length];
for (int i = 0; i < characters.length; ++i) {
chars[i] = TCharacter.toLowerCase(characters[i]);
}
return new TString(chars);
}
private TString toLowerCaseCodePoints() {
int[] codePoints = new int[characters.length];
int codePointCount = 0;
for (int i = 0; i < characters.length; ++i) {
@ -663,6 +690,33 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
if (isEmpty()) {
return this;
}
var hasCharsToTransform = false;
var hasSurrogates = false;
for (var c : characters) {
if (Character.toUpperCase(c) != c) {
hasCharsToTransform = true;
break;
}
if (Character.isSurrogate(c)) {
hasSurrogates = true;
}
}
if (!hasCharsToTransform) {
return this;
}
return hasSurrogates ? toUpperCaseCodePoints() : toUpperCaseChars();
}
private TString toUpperCaseChars() {
var chars = new char[characters.length];
for (int i = 0; i < characters.length; ++i) {
chars[i] = TCharacter.toUpperCase(characters[i]);
}
return new TString(chars);
}
private TString toUpperCaseCodePoints() {
int[] codePoints = new int[characters.length];
int codePointCount = 0;
for (int i = 0; i < characters.length; ++i) {