Refactoring of UTF-16 encoding/decoding for further implementation of

encoding/decoding charsets
This commit is contained in:
konsoletyper 2013-12-17 17:23:59 +04:00
parent 79e2eca91d
commit e595ecc5db
5 changed files with 71 additions and 48 deletions

View File

@ -1,10 +0,0 @@
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
@CharsetName("UTF-16")
public class UTF16Charset {
}

View File

@ -0,0 +1,39 @@
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public class UTF16Helper {
public static final int SURROGATE_BIT_MASK = 0xFC00;
public static final int SURROGATE_BIT_INV_MASK = 0x03FF;
public static final int HIGH_SURROGATE_BITS = 0xF800;
public static final int LOW_SURROGATE_BITS = 0xF800;
public static final int MEANINGFUL_SURROGATE_BITS = 10;
public static final int SUPPLEMENTARY_PLANE = 0x10000;
public static char highSurrogate(int codePoint) {
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
}
public static char lowSurrogate(int codePoint) {
return (char)(HIGH_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
}
public static boolean isHighSurrogate(char c) {
return (c & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
}
public static boolean isLowSurrogate(char c) {
return (c & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
}
public static boolean isSurrogatePair(char a, char b) {
return isHighSurrogate(a) && isLowSurrogate(b);
}
public static int buildCodePoint(char a, char b) {
return ((a & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
(b & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
}
}

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.charset.UTF16Helper;
import org.teavm.classlib.java.lang.io.TSerializable;
import org.teavm.classlib.java.util.TArrays;
import org.teavm.javascript.ni.Remove;
@ -289,13 +290,13 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
}
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
if (codePoint < TString.SUPPLEMENTARY_PLANE) {
if (codePoint < UTF16Helper.SUPPLEMENTARY_PLANE) {
return append((char)codePoint);
}
ensureCapacity(length + 2);
codePoint -= TString.SUPPLEMENTARY_PLANE;
buffer[length++] = TString.highSurrogate(codePoint);
buffer[length++] = TString.lowSurrogate(codePoint);
codePoint -= UTF16Helper.SUPPLEMENTARY_PLANE;
buffer[length++] = UTF16Helper.highSurrogate(codePoint);
buffer[length++] = UTF16Helper.lowSurrogate(codePoint);
return this;
}
@ -362,6 +363,6 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
}
public void setLength(int newLength) {
length = 0;
length = newLength;
}
}

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.charset.UTF16Helper;
import org.teavm.classlib.java.lang.io.TSerializable;
import org.teavm.javascript.ni.GeneratedBy;
import org.teavm.javascript.ni.Rename;
@ -10,12 +11,6 @@ import org.teavm.javascript.ni.Rename;
*/
public class TString extends TObject implements TSerializable, TComparable<TString>,
TCharSequence {
static int SURROGATE_BIT_MASK = 0xFC00;
static int SURROGATE_BIT_INV_MASK = 0x03FF;
static int HIGH_SURROGATE_BITS = 0xF800;
static int LOW_SURROGATE_BITS = 0xF800;
static int MEANINGFUL_SURROGATE_BITS = 10;
static int SUPPLEMENTARY_PLANE = 0x10000;
private char[] characters;
private transient int hashCode;
@ -54,29 +49,26 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
public int codePointAt(int index) {
if (index == characters.length - 1 || (characters[index] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS ||
(characters[index + 1] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS) {
if (index == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
!UTF16Helper.isLowSurrogate(characters[index + 1])) {
return characters[index];
}
return ((characters[index] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
(characters[index + 1] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
return UTF16Helper.buildCodePoint(characters[index], characters[index + 1]);
}
public int codePointBefore(int index) {
if (index == 1 || (characters[index] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS ||
(characters[index - 1] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS) {
if (index == 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
UTF16Helper.isLowSurrogate(characters[index - 1])) {
return characters[index - 1];
}
return ((characters[index - 1] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
(characters[index] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
return UTF16Helper.buildCodePoint(characters[index - 1], characters[index]);
}
public int codePointCount(int beginIndex, int endIndex) {
int count = endIndex;
--endIndex;
for (int i = beginIndex; i < endIndex; ++i) {
if ((characters[i] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS &&
(characters[i + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) {
if (UTF16Helper.isHighSurrogate(characters[i]) && UTF16Helper.isLowSurrogate(characters[i + 1])) {
--count;
}
}
@ -85,8 +77,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
public int offsetByCodePoints(int index, int codePointOffset) {
for (int i = 0; i < codePointOffset; ++i) {
if (index < characters.length - 1 && (characters[index] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS &&
(characters[index + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) {
if (index < characters.length - 1 && UTF16Helper.isHighSurrogate(characters[index]) &&
UTF16Helper.isLowSurrogate(characters[index + 1])) {
index += 2;
} else {
index++;
@ -192,17 +184,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
return true;
}
static char highSurrogate(int codePoint) {
return (char)(TString.HIGH_SURROGATE_BITS | (codePoint >> TString.MEANINGFUL_SURROGATE_BITS) &
TString.SURROGATE_BIT_INV_MASK);
}
static char lowSurrogate(int codePoint) {
return (char)(TString.HIGH_SURROGATE_BITS | codePoint & TString.SURROGATE_BIT_INV_MASK);
}
public int indexOf(int ch, int fromIndex) {
if (ch < SUPPLEMENTARY_PLANE) {
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
char bmpChar = (char)ch;
for (int i = fromIndex; i < characters.length; ++i) {
if (characters[i] == bmpChar) {
@ -211,8 +194,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
return -1;
} else {
char hi = highSurrogate(ch);
char lo = lowSurrogate(ch);
char hi = UTF16Helper.highSurrogate(ch);
char lo = UTF16Helper.lowSurrogate(ch);
for (int i = fromIndex; i < characters.length - 1; ++i) {
if (characters[i] == hi && characters[i + 1] == lo) {
return i;
@ -227,7 +210,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
public int lastIndexOf(int ch, int fromIndex) {
if (ch < SUPPLEMENTARY_PLANE) {
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
char bmpChar = (char)ch;
for (int i = fromIndex; i >= 0; --i) {
if (characters[i] == bmpChar) {
@ -236,8 +219,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
return -1;
} else {
char hi = highSurrogate(ch);
char lo = lowSurrogate(ch);
char hi = UTF16Helper.highSurrogate(ch);
char lo = UTF16Helper.lowSurrogate(ch);
for (int i = fromIndex; i >= 1; --i) {
if (characters[i] == lo && characters[i - 1] == hi) {
return i;

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang.io;
import org.teavm.classlib.java.lang.TMath;
import org.teavm.classlib.java.lang.TStringBuilder;
/**
@ -95,7 +96,16 @@ public class TPrintStream extends TFilterOutputStream {
}
public void print(char[] s) {
print(s, 0, s.length);
}
private void print(char[] s, int begin, int end) {
int[] codePoints = new int[TMath.min(s.length, 4096)];
}
public void print(char c) {
buffer[0] = c;
print(buffer, 0, 1);
}
public void print(int i) {