mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2024-12-22 16:14:10 -08:00
Refactoring of UTF-16 encoding/decoding for further implementation of
encoding/decoding charsets
This commit is contained in:
parent
79e2eca91d
commit
e595ecc5db
|
@ -1,10 +0,0 @@
|
||||||
package org.teavm.classlib.impl.charset;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author Alexey Andreev
|
|
||||||
*/
|
|
||||||
@CharsetName("UTF-16")
|
|
||||||
public class UTF16Charset {
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
package org.teavm.classlib.impl.charset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author Alexey Andreev
|
||||||
|
*/
|
||||||
|
public class UTF16Helper {
|
||||||
|
public static final int SURROGATE_BIT_MASK = 0xFC00;
|
||||||
|
public static final int SURROGATE_BIT_INV_MASK = 0x03FF;
|
||||||
|
public static final int HIGH_SURROGATE_BITS = 0xF800;
|
||||||
|
public static final int LOW_SURROGATE_BITS = 0xF800;
|
||||||
|
public static final int MEANINGFUL_SURROGATE_BITS = 10;
|
||||||
|
public static final int SUPPLEMENTARY_PLANE = 0x10000;
|
||||||
|
|
||||||
|
public static char highSurrogate(int codePoint) {
|
||||||
|
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static char lowSurrogate(int codePoint) {
|
||||||
|
return (char)(HIGH_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isHighSurrogate(char c) {
|
||||||
|
return (c & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isLowSurrogate(char c) {
|
||||||
|
return (c & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isSurrogatePair(char a, char b) {
|
||||||
|
return isHighSurrogate(a) && isLowSurrogate(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int buildCodePoint(char a, char b) {
|
||||||
|
return ((a & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
|
||||||
|
(b & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,6 @@
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
|
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||||
import org.teavm.classlib.java.lang.io.TSerializable;
|
import org.teavm.classlib.java.lang.io.TSerializable;
|
||||||
import org.teavm.classlib.java.util.TArrays;
|
import org.teavm.classlib.java.util.TArrays;
|
||||||
import org.teavm.javascript.ni.Remove;
|
import org.teavm.javascript.ni.Remove;
|
||||||
|
@ -289,13 +290,13 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
|
||||||
}
|
}
|
||||||
|
|
||||||
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
|
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
|
||||||
if (codePoint < TString.SUPPLEMENTARY_PLANE) {
|
if (codePoint < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||||
return append((char)codePoint);
|
return append((char)codePoint);
|
||||||
}
|
}
|
||||||
ensureCapacity(length + 2);
|
ensureCapacity(length + 2);
|
||||||
codePoint -= TString.SUPPLEMENTARY_PLANE;
|
codePoint -= UTF16Helper.SUPPLEMENTARY_PLANE;
|
||||||
buffer[length++] = TString.highSurrogate(codePoint);
|
buffer[length++] = UTF16Helper.highSurrogate(codePoint);
|
||||||
buffer[length++] = TString.lowSurrogate(codePoint);
|
buffer[length++] = UTF16Helper.lowSurrogate(codePoint);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,6 +363,6 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLength(int newLength) {
|
public void setLength(int newLength) {
|
||||||
length = 0;
|
length = newLength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
|
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||||
import org.teavm.classlib.java.lang.io.TSerializable;
|
import org.teavm.classlib.java.lang.io.TSerializable;
|
||||||
import org.teavm.javascript.ni.GeneratedBy;
|
import org.teavm.javascript.ni.GeneratedBy;
|
||||||
import org.teavm.javascript.ni.Rename;
|
import org.teavm.javascript.ni.Rename;
|
||||||
|
@ -10,12 +11,6 @@ import org.teavm.javascript.ni.Rename;
|
||||||
*/
|
*/
|
||||||
public class TString extends TObject implements TSerializable, TComparable<TString>,
|
public class TString extends TObject implements TSerializable, TComparable<TString>,
|
||||||
TCharSequence {
|
TCharSequence {
|
||||||
static int SURROGATE_BIT_MASK = 0xFC00;
|
|
||||||
static int SURROGATE_BIT_INV_MASK = 0x03FF;
|
|
||||||
static int HIGH_SURROGATE_BITS = 0xF800;
|
|
||||||
static int LOW_SURROGATE_BITS = 0xF800;
|
|
||||||
static int MEANINGFUL_SURROGATE_BITS = 10;
|
|
||||||
static int SUPPLEMENTARY_PLANE = 0x10000;
|
|
||||||
private char[] characters;
|
private char[] characters;
|
||||||
private transient int hashCode;
|
private transient int hashCode;
|
||||||
|
|
||||||
|
@ -54,29 +49,26 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public int codePointAt(int index) {
|
public int codePointAt(int index) {
|
||||||
if (index == characters.length - 1 || (characters[index] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS ||
|
if (index == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
|
||||||
(characters[index + 1] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS) {
|
!UTF16Helper.isLowSurrogate(characters[index + 1])) {
|
||||||
return characters[index];
|
return characters[index];
|
||||||
}
|
}
|
||||||
return ((characters[index] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
|
return UTF16Helper.buildCodePoint(characters[index], characters[index + 1]);
|
||||||
(characters[index + 1] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int codePointBefore(int index) {
|
public int codePointBefore(int index) {
|
||||||
if (index == 1 || (characters[index] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS ||
|
if (index == 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
|
||||||
(characters[index - 1] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS) {
|
UTF16Helper.isLowSurrogate(characters[index - 1])) {
|
||||||
return characters[index - 1];
|
return characters[index - 1];
|
||||||
}
|
}
|
||||||
return ((characters[index - 1] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
|
return UTF16Helper.buildCodePoint(characters[index - 1], characters[index]);
|
||||||
(characters[index] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int codePointCount(int beginIndex, int endIndex) {
|
public int codePointCount(int beginIndex, int endIndex) {
|
||||||
int count = endIndex;
|
int count = endIndex;
|
||||||
--endIndex;
|
--endIndex;
|
||||||
for (int i = beginIndex; i < endIndex; ++i) {
|
for (int i = beginIndex; i < endIndex; ++i) {
|
||||||
if ((characters[i] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS &&
|
if (UTF16Helper.isHighSurrogate(characters[i]) && UTF16Helper.isLowSurrogate(characters[i + 1])) {
|
||||||
(characters[i + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) {
|
|
||||||
--count;
|
--count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,8 +77,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
|
|
||||||
public int offsetByCodePoints(int index, int codePointOffset) {
|
public int offsetByCodePoints(int index, int codePointOffset) {
|
||||||
for (int i = 0; i < codePointOffset; ++i) {
|
for (int i = 0; i < codePointOffset; ++i) {
|
||||||
if (index < characters.length - 1 && (characters[index] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS &&
|
if (index < characters.length - 1 && UTF16Helper.isHighSurrogate(characters[index]) &&
|
||||||
(characters[index + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) {
|
UTF16Helper.isLowSurrogate(characters[index + 1])) {
|
||||||
index += 2;
|
index += 2;
|
||||||
} else {
|
} else {
|
||||||
index++;
|
index++;
|
||||||
|
@ -192,17 +184,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char highSurrogate(int codePoint) {
|
|
||||||
return (char)(TString.HIGH_SURROGATE_BITS | (codePoint >> TString.MEANINGFUL_SURROGATE_BITS) &
|
|
||||||
TString.SURROGATE_BIT_INV_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char lowSurrogate(int codePoint) {
|
|
||||||
return (char)(TString.HIGH_SURROGATE_BITS | codePoint & TString.SURROGATE_BIT_INV_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int indexOf(int ch, int fromIndex) {
|
public int indexOf(int ch, int fromIndex) {
|
||||||
if (ch < SUPPLEMENTARY_PLANE) {
|
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||||
char bmpChar = (char)ch;
|
char bmpChar = (char)ch;
|
||||||
for (int i = fromIndex; i < characters.length; ++i) {
|
for (int i = fromIndex; i < characters.length; ++i) {
|
||||||
if (characters[i] == bmpChar) {
|
if (characters[i] == bmpChar) {
|
||||||
|
@ -211,8 +194,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
char hi = highSurrogate(ch);
|
char hi = UTF16Helper.highSurrogate(ch);
|
||||||
char lo = lowSurrogate(ch);
|
char lo = UTF16Helper.lowSurrogate(ch);
|
||||||
for (int i = fromIndex; i < characters.length - 1; ++i) {
|
for (int i = fromIndex; i < characters.length - 1; ++i) {
|
||||||
if (characters[i] == hi && characters[i + 1] == lo) {
|
if (characters[i] == hi && characters[i + 1] == lo) {
|
||||||
return i;
|
return i;
|
||||||
|
@ -227,7 +210,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public int lastIndexOf(int ch, int fromIndex) {
|
public int lastIndexOf(int ch, int fromIndex) {
|
||||||
if (ch < SUPPLEMENTARY_PLANE) {
|
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||||
char bmpChar = (char)ch;
|
char bmpChar = (char)ch;
|
||||||
for (int i = fromIndex; i >= 0; --i) {
|
for (int i = fromIndex; i >= 0; --i) {
|
||||||
if (characters[i] == bmpChar) {
|
if (characters[i] == bmpChar) {
|
||||||
|
@ -236,8 +219,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
char hi = highSurrogate(ch);
|
char hi = UTF16Helper.highSurrogate(ch);
|
||||||
char lo = lowSurrogate(ch);
|
char lo = UTF16Helper.lowSurrogate(ch);
|
||||||
for (int i = fromIndex; i >= 1; --i) {
|
for (int i = fromIndex; i >= 1; --i) {
|
||||||
if (characters[i] == lo && characters[i - 1] == hi) {
|
if (characters[i] == lo && characters[i - 1] == hi) {
|
||||||
return i;
|
return i;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.teavm.classlib.java.lang.io;
|
package org.teavm.classlib.java.lang.io;
|
||||||
|
|
||||||
|
import org.teavm.classlib.java.lang.TMath;
|
||||||
import org.teavm.classlib.java.lang.TStringBuilder;
|
import org.teavm.classlib.java.lang.TStringBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -95,7 +96,16 @@ public class TPrintStream extends TFilterOutputStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void print(char[] s) {
|
public void print(char[] s) {
|
||||||
|
print(s, 0, s.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void print(char[] s, int begin, int end) {
|
||||||
|
int[] codePoints = new int[TMath.min(s.length, 4096)];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void print(char c) {
|
||||||
|
buffer[0] = c;
|
||||||
|
print(buffer, 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void print(int i) {
|
public void print(int i) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user