classlib: various fixes in Character class (#831)

* fix bug in `codePointBefore`
* validate arguments in some methods
* support negative `codePointOffset` in `offsetByCodePoints`
* add tests
This commit is contained in:
Ivan Hetman 2023-10-27 18:25:32 +03:00 committed by GitHub
parent fc49094d96
commit b006cbb206
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 212 additions and 13 deletions

View File

@ -15,6 +15,7 @@
*/
package org.teavm.classlib.java.lang;
import java.util.Objects;
import org.teavm.classlib.impl.unicode.CharMapping;
import org.teavm.classlib.impl.unicode.UnicodeHelper;
import org.teavm.platform.metadata.StringResource;
@ -177,7 +178,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int charCount(int codePoint) {
return isSupplementaryCodePoint(codePoint) ? 2 : 1;
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
}
public static int toCodePoint(char high, char low) {
@ -199,6 +200,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int codePointAt(char[] a, int index, int limit) {
if (index >= limit || index < 0 || limit > a.length) {
throw new IndexOutOfBoundsException();
}
if (index >= limit - 1 || !isHighSurrogate(a[index]) || !isLowSurrogate(a[index + 1])) {
return a[index];
} else {
@ -218,8 +222,11 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int codePointBefore(char[] a, int index, int start) {
if (index > a.length || index <= start || start < 0) {
throw new IndexOutOfBoundsException();
}
if (index <= start + 1 || !isLowSurrogate(a[index - 1]) || !isHighSurrogate(a[index - 2])) {
return a[index];
return a[index - 1];
} else {
return toCodePoint(a[index - 2], a[index - 1]);
}
@ -396,6 +403,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
private static native StringResource obtainClasses();
public static int toChars(int codePoint, char[] dst, int dstIndex) {
if (!isValidCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
dst[dstIndex] = highSurrogate(codePoint);
dst[dstIndex + 1] = lowSurrogate(codePoint);
@ -407,6 +417,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static char[] toChars(int codePoint) {
if (!isValidCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
return new char[] { highSurrogate(codePoint), lowSurrogate(codePoint) };
} else {
@ -415,6 +428,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int codePointCount(TCharSequence seq, int beginIndex, int endIndex) {
Objects.checkFromToIndex(beginIndex, endIndex, seq.length());
int count = endIndex - beginIndex;
--endIndex;
for (int i = beginIndex; i < endIndex; ++i) {
@ -427,6 +441,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int codePointCount(char[] a, int offset, int count) {
Objects.checkFromIndexSize(offset, count, a.length);
int r = count;
--count;
for (int i = 0; i < count; ++i) {
@ -439,23 +454,66 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
}
public static int offsetByCodePoints(TCharSequence seq, int index, int codePointOffset) {
for (int i = 0; i < codePointOffset; ++i) {
if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index))
&& isLowSurrogate(seq.charAt(index + 1))) {
index += 2;
} else {
index++;
if (codePointOffset >= 0) {
int i;
for (i = 0; i < codePointOffset && index < seq.length(); ++i) {
if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index))
&& isLowSurrogate(seq.charAt(index + 1))) {
index += 2;
} else {
index++;
}
}
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
int i;
for (i = codePointOffset; i < 0 && index > 0; ++i) {
if (index > 0 && isLowSurrogate(seq.charAt(index - 1))
&& isHighSurrogate(seq.charAt(index - 2))) {
index -= 2;
} else {
index--;
}
}
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return index;
}
public static int offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset) {
for (int i = 0; i < codePointOffset; ++i) {
if (index < count - 1 && isHighSurrogate(a[index + start]) && isLowSurrogate(a[index + start + 1])) {
index += 2;
} else {
index++;
if (count > a.length - start || start < 0 || count < 0
|| index < start || index > start + count) {
throw new IndexOutOfBoundsException();
}
if (codePointOffset >= 0) {
int i;
for (i = 0; i < codePointOffset && index < start + count; ++i) {
if (index < count - 1 && isHighSurrogate(a[index])
&& isLowSurrogate(a[index + 1])) {
index += 2;
} else {
index++;
}
}
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
int i;
for (i = codePointOffset; i < 0 && index > start; ++i) {
if (index > start && isLowSurrogate(a[index - 1])
&& isHighSurrogate(a[index - 2])) {
index -= 2;
} else {
index--;
}
}
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return index;

View File

@ -16,6 +16,7 @@
package org.teavm.classlib.java.lang;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.teavm.junit.TeaVMTestRunner;
@ -88,4 +89,144 @@ public class CharacterTest {
assertEquals('\0', Character.forDigit(10, 10));
assertEquals('5', Character.forDigit(5, 6));
}
@Test
public void offsetByCodePointsCharSequence() {
int result = Character.offsetByCodePoints("a\uD800\uDC00b", 0, 2);
assertEquals(3, result);
result = Character.offsetByCodePoints("abcd", 3, -1);
assertEquals(2, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b", 0, 3);
assertEquals(4, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b", 3, -1);
assertEquals(1, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b", 3, 0);
assertEquals(3, result);
result = Character.offsetByCodePoints("\uD800\uDC00bc", 3, 0);
assertEquals(3, result);
result = Character.offsetByCodePoints("a\uDC00bc", 3, -1);
assertEquals(2, result);
result = Character.offsetByCodePoints("a\uD800bc", 3, -1);
assertEquals(2, result);
try {
Character.offsetByCodePoints(null, 0, 1);
fail();
} catch (NullPointerException e) {
// ok
}
try {
Character.offsetByCodePoints("abc", -1, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc", 4, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc", 1, 3);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc", 1, -2);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
}
@Test
public void offsetByCodePointsCharArray() {
int result = Character.offsetByCodePoints("a\uD800\uDC00b"
.toCharArray(), 0, 4, 0, 2);
assertEquals(3, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(),
0, 4, 0, 3);
assertEquals(4, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b\uD800c"
.toCharArray(), 0, 5, 0, 3);
assertEquals(4, result);
result = Character
.offsetByCodePoints("abcd".toCharArray(), 0, 4, 3, -1);
assertEquals(2, result);
result = Character
.offsetByCodePoints("abcd".toCharArray(), 1, 2, 3, -2);
assertEquals(1, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(),
0, 4, 3, -1);
assertEquals(1, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(),
0, 2, 2, -1);
assertEquals(1, result);
result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(),
0, 4, 3, 0);
assertEquals(3, result);
result = Character.offsetByCodePoints("\uD800\uDC00bc".toCharArray(),
0, 4, 3, 0);
assertEquals(3, result);
result = Character.offsetByCodePoints("a\uDC00bc".toCharArray(), 0, 4,
3, -1);
assertEquals(2, result);
result = Character.offsetByCodePoints("a\uD800bc".toCharArray(), 0, 4,
3, -1);
assertEquals(2, result);
try {
Character.offsetByCodePoints(null, 0, 4, 1, 1);
fail();
} catch (NullPointerException e) {
// ok
}
try {
Character.offsetByCodePoints("abcd".toCharArray(), -1, 4, 1, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abcd".toCharArray(), 0, -1, 1, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abcd".toCharArray(), 2, 4, 1, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abcd".toCharArray(), 1, 3, 0, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abcd".toCharArray(), 1, 1, 3, 1);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc".toCharArray(), 0, 3, 1, 3);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc".toCharArray(), 0, 2, 1, 2);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
try {
Character.offsetByCodePoints("abc".toCharArray(), 1, 3, 1, -2);
fail();
} catch (IndexOutOfBoundsException e) {
// ok
}
}
}