From b006cbb2063135afdb3b2fe157d934b5407377c7 Mon Sep 17 00:00:00 2001 From: Ivan Hetman Date: Fri, 27 Oct 2023 18:25:32 +0300 Subject: [PATCH] classlib: various fixes in Character class (#831) * fix bug in `codePointBefore` * validate arguments in some methods * support negative `codePointOffset` in `offsetByCodePoints` * add tests --- .../teavm/classlib/java/lang/TCharacter.java | 84 +++++++++-- .../classlib/java/lang/CharacterTest.java | 141 ++++++++++++++++++ 2 files changed, 212 insertions(+), 13 deletions(-) diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java index 87ac35aac..f66b0da1e 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TCharacter.java @@ -15,6 +15,7 @@ */ package org.teavm.classlib.java.lang; +import java.util.Objects; import org.teavm.classlib.impl.unicode.CharMapping; import org.teavm.classlib.impl.unicode.UnicodeHelper; import org.teavm.platform.metadata.StringResource; @@ -177,7 +178,7 @@ public class TCharacter extends TObject implements TComparable { } public static int charCount(int codePoint) { - return isSupplementaryCodePoint(codePoint) ? 2 : 1; + return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; } public static int toCodePoint(char high, char low) { @@ -199,6 +200,9 @@ public class TCharacter extends TObject implements TComparable { } public static int codePointAt(char[] a, int index, int limit) { + if (index >= limit || index < 0 || limit > a.length) { + throw new IndexOutOfBoundsException(); + } if (index >= limit - 1 || !isHighSurrogate(a[index]) || !isLowSurrogate(a[index + 1])) { return a[index]; } else { @@ -218,8 +222,11 @@ public class TCharacter extends TObject implements TComparable { } public static int codePointBefore(char[] a, int index, int start) { + if (index > a.length || index <= start || start < 0) { + throw new IndexOutOfBoundsException(); + } if (index <= start + 1 || !isLowSurrogate(a[index - 1]) || !isHighSurrogate(a[index - 2])) { - return a[index]; + return a[index - 1]; } else { return toCodePoint(a[index - 2], a[index - 1]); } @@ -396,6 +403,9 @@ public class TCharacter extends TObject implements TComparable { private static native StringResource obtainClasses(); public static int toChars(int codePoint, char[] dst, int dstIndex) { + if (!isValidCodePoint(codePoint)) { + throw new IllegalArgumentException(); + } if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { dst[dstIndex] = highSurrogate(codePoint); dst[dstIndex + 1] = lowSurrogate(codePoint); @@ -407,6 +417,9 @@ public class TCharacter extends TObject implements TComparable { } public static char[] toChars(int codePoint) { + if (!isValidCodePoint(codePoint)) { + throw new IllegalArgumentException(); + } if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { return new char[] { highSurrogate(codePoint), lowSurrogate(codePoint) }; } else { @@ -415,6 +428,7 @@ public class TCharacter extends TObject implements TComparable { } public static int codePointCount(TCharSequence seq, int beginIndex, int endIndex) { + Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); int count = endIndex - beginIndex; --endIndex; for (int i = beginIndex; i < endIndex; ++i) { @@ -427,6 +441,7 @@ public class TCharacter extends TObject implements TComparable { } public static int codePointCount(char[] a, int offset, int count) { + Objects.checkFromIndexSize(offset, count, a.length); int r = count; --count; for (int i = 0; i < count; ++i) { @@ -439,23 +454,66 @@ public class TCharacter extends TObject implements TComparable { } public static int offsetByCodePoints(TCharSequence seq, int index, int codePointOffset) { - for (int i = 0; i < codePointOffset; ++i) { - if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index)) - && isLowSurrogate(seq.charAt(index + 1))) { - index += 2; - } else { - index++; + if (codePointOffset >= 0) { + int i; + for (i = 0; i < codePointOffset && index < seq.length(); ++i) { + if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index)) + && isLowSurrogate(seq.charAt(index + 1))) { + index += 2; + } else { + index++; + } + } + if (i < codePointOffset) { + throw new IndexOutOfBoundsException(); + } + } else { + int i; + for (i = codePointOffset; i < 0 && index > 0; ++i) { + if (index > 0 && isLowSurrogate(seq.charAt(index - 1)) + && isHighSurrogate(seq.charAt(index - 2))) { + index -= 2; + } else { + index--; + } + } + if (i < 0) { + throw new IndexOutOfBoundsException(); } } return index; } public static int offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset) { - for (int i = 0; i < codePointOffset; ++i) { - if (index < count - 1 && isHighSurrogate(a[index + start]) && isLowSurrogate(a[index + start + 1])) { - index += 2; - } else { - index++; + if (count > a.length - start || start < 0 || count < 0 + || index < start || index > start + count) { + throw new IndexOutOfBoundsException(); + } + if (codePointOffset >= 0) { + int i; + for (i = 0; i < codePointOffset && index < start + count; ++i) { + if (index < count - 1 && isHighSurrogate(a[index]) + && isLowSurrogate(a[index + 1])) { + index += 2; + } else { + index++; + } + } + if (i < codePointOffset) { + throw new IndexOutOfBoundsException(); + } + } else { + int i; + for (i = codePointOffset; i < 0 && index > start; ++i) { + if (index > start && isLowSurrogate(a[index - 1]) + && isHighSurrogate(a[index - 2])) { + index -= 2; + } else { + index--; + } + } + if (i < 0) { + throw new IndexOutOfBoundsException(); } } return index; diff --git a/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java b/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java index 82d8637e2..a0a31563d 100644 --- a/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java +++ b/tests/src/test/java/org/teavm/classlib/java/lang/CharacterTest.java @@ -16,6 +16,7 @@ package org.teavm.classlib.java.lang; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import org.junit.Test; import org.junit.runner.RunWith; import org.teavm.junit.TeaVMTestRunner; @@ -88,4 +89,144 @@ public class CharacterTest { assertEquals('\0', Character.forDigit(10, 10)); assertEquals('5', Character.forDigit(5, 6)); } + + @Test + public void offsetByCodePointsCharSequence() { + int result = Character.offsetByCodePoints("a\uD800\uDC00b", 0, 2); + assertEquals(3, result); + result = Character.offsetByCodePoints("abcd", 3, -1); + assertEquals(2, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b", 0, 3); + assertEquals(4, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b", 3, -1); + assertEquals(1, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b", 3, 0); + assertEquals(3, result); + result = Character.offsetByCodePoints("\uD800\uDC00bc", 3, 0); + assertEquals(3, result); + result = Character.offsetByCodePoints("a\uDC00bc", 3, -1); + assertEquals(2, result); + result = Character.offsetByCodePoints("a\uD800bc", 3, -1); + assertEquals(2, result); + try { + Character.offsetByCodePoints(null, 0, 1); + fail(); + } catch (NullPointerException e) { + // ok + } + try { + Character.offsetByCodePoints("abc", -1, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc", 4, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc", 1, 3); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc", 1, -2); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + } + @Test + public void offsetByCodePointsCharArray() { + int result = Character.offsetByCodePoints("a\uD800\uDC00b" + .toCharArray(), 0, 4, 0, 2); + assertEquals(3, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(), + 0, 4, 0, 3); + assertEquals(4, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b\uD800c" + .toCharArray(), 0, 5, 0, 3); + assertEquals(4, result); + result = Character + .offsetByCodePoints("abcd".toCharArray(), 0, 4, 3, -1); + assertEquals(2, result); + result = Character + .offsetByCodePoints("abcd".toCharArray(), 1, 2, 3, -2); + assertEquals(1, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(), + 0, 4, 3, -1); + assertEquals(1, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(), + 0, 2, 2, -1); + assertEquals(1, result); + result = Character.offsetByCodePoints("a\uD800\uDC00b".toCharArray(), + 0, 4, 3, 0); + assertEquals(3, result); + result = Character.offsetByCodePoints("\uD800\uDC00bc".toCharArray(), + 0, 4, 3, 0); + assertEquals(3, result); + result = Character.offsetByCodePoints("a\uDC00bc".toCharArray(), 0, 4, + 3, -1); + assertEquals(2, result); + result = Character.offsetByCodePoints("a\uD800bc".toCharArray(), 0, 4, + 3, -1); + assertEquals(2, result); + try { + Character.offsetByCodePoints(null, 0, 4, 1, 1); + fail(); + } catch (NullPointerException e) { + // ok + } + try { + Character.offsetByCodePoints("abcd".toCharArray(), -1, 4, 1, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abcd".toCharArray(), 0, -1, 1, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abcd".toCharArray(), 2, 4, 1, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abcd".toCharArray(), 1, 3, 0, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abcd".toCharArray(), 1, 1, 3, 1); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc".toCharArray(), 0, 3, 1, 3); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc".toCharArray(), 0, 2, 1, 2); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + try { + Character.offsetByCodePoints("abc".toCharArray(), 1, 3, 1, -2); + fail(); + } catch (IndexOutOfBoundsException e) { + // ok + } + } }