From fc3d92f58dd6a0c22dcbbf99829a92c7b164b5de Mon Sep 17 00:00:00 2001 From: Alexey Andreev Date: Sat, 30 Sep 2023 07:17:22 +0200 Subject: [PATCH] classlib: improve performance of String.toLower/toUpper --- .../classlib/impl/unicode/CharMapping.java | 26 +++++++++ .../classlib/impl/unicode/UnicodeHelper.java | 20 +++++++ .../teavm/classlib/java/lang/TCharacter.java | 35 +++++++----- .../org/teavm/classlib/java/lang/TString.java | 54 +++++++++++++++++++ 4 files changed, 122 insertions(+), 13 deletions(-) create mode 100644 classlib/src/main/java/org/teavm/classlib/impl/unicode/CharMapping.java diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/CharMapping.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/CharMapping.java new file mode 100644 index 000000000..e620255b5 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/CharMapping.java @@ -0,0 +1,26 @@ +/* + * Copyright 2023 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.impl.unicode; + +public class CharMapping { + public final int[] binarySearchTable; + public final int[] fastTable; + + public CharMapping(int[] binarySearchTable, int[] fastTable) { + this.binarySearchTable = binarySearchTable; + this.fastTable = fastTable; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java index 1a0047940..4ef63a720 100644 --- a/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java +++ b/classlib/src/main/java/org/teavm/classlib/impl/unicode/UnicodeHelper.java @@ -126,6 +126,26 @@ public final class UnicodeHelper { return data; } + public static CharMapping createCharMapping(int[] data) { + var result = new int[65536]; + var last = 0; + var lastValue = 0; + for (var i = 0; i < data.length; i += 2) { + var key = data[i]; + var value = data[i + 1]; + if (key >= result.length) { + if (key == last) { + break; + } + key = result.length; + } + Arrays.fill(result, last, key, lastValue); + last = key; + lastValue = value; + } + return new CharMapping(data, result); + } + public static char encodeByte(byte b) { if (b < '\"' - ' ') { return (char) (b + ' '); diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java index 6df537812..b96d20191 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java @@ -15,6 +15,7 @@ */ package org.teavm.classlib.java.lang; +import org.teavm.classlib.impl.unicode.CharMapping; import org.teavm.classlib.impl.unicode.UnicodeHelper; import org.teavm.platform.metadata.StringResource; @@ -87,9 +88,9 @@ public class TCharacter extends TObject implements TComparable { public static final int BYTES = SIZE / Byte.SIZE; static final int ERROR = 0xFFFFFFFF; private static int[] digitMapping; - private static int[] titleCaseMapping; - private static int[] upperCaseMapping; - private static int[] lowerCaseMapping; + private static CharMapping titleCaseMapping; + private static CharMapping upperCaseMapping; + private static CharMapping lowerCaseMapping; private static UnicodeHelper.Range[] classMapping; private final char value; private static TCharacter[] characterCache = new TCharacter[128]; @@ -241,9 +242,10 @@ public class TCharacter extends TObject implements TComparable { return mapChar(getLowerCaseMapping(), ch); } - private static int[] getLowerCaseMapping() { + private static CharMapping getLowerCaseMapping() { if (lowerCaseMapping == null) { - lowerCaseMapping = UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue()); + lowerCaseMapping = UnicodeHelper.createCharMapping( + UnicodeHelper.decodeCaseMapping(acquireLowerCaseMapping().getValue())); } return lowerCaseMapping; } @@ -259,9 +261,10 @@ public class TCharacter extends TObject implements TComparable { return mapChar(getUpperCaseMapping(), codePoint); } - private static int[] getUpperCaseMapping() { + private static CharMapping getUpperCaseMapping() { if (upperCaseMapping == null) { - upperCaseMapping = UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue()); + upperCaseMapping = UnicodeHelper.createCharMapping( + UnicodeHelper.decodeCaseMapping(acquireUpperCaseMapping().getValue())); } return upperCaseMapping; } @@ -280,21 +283,27 @@ public class TCharacter extends TObject implements TComparable { return (char) toTitleCase((int) c); } - private static int[] getTitleCaseMapping() { + private static CharMapping getTitleCaseMapping() { if (titleCaseMapping == null) { - titleCaseMapping = UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue()); + titleCaseMapping = UnicodeHelper.createCharMapping( + UnicodeHelper.decodeCaseMapping(acquireTitleCaseMapping().getValue())); } return titleCaseMapping; } private static native StringResource acquireTitleCaseMapping(); - private static int mapChar(int[] table, int codePoint) { - int index = binarySearchTable(table, codePoint); - if (index < 0 || index >= table.length / 2) { + private static int mapChar(CharMapping table, int codePoint) { + if (codePoint < table.fastTable.length) { + return codePoint + table.fastTable[codePoint]; + } + + var binSearchTable = table.binarySearchTable; + int index = binarySearchTable(binSearchTable, codePoint); + if (index < 0 || index * 2 >= binSearchTable.length) { return 0; } - return codePoint + table[index * 2 + 1]; + return codePoint + binSearchTable[index * 2 + 1]; } private static int binarySearchTable(int[] data, int key) { diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java index 272145bf2..69ae16c20 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java @@ -640,6 +640,33 @@ public class TString extends TObject implements TSerializable, TComparable