Refactoring of UTF-16 encoding/decoding for further implementation of

encoding/decoding charsets
This commit is contained in:
konsoletyper 2013-12-17 17:23:59 +04:00
parent 79e2eca91d
commit e595ecc5db
5 changed files with 71 additions and 48 deletions

View File

@ -1,10 +0,0 @@
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
@CharsetName("UTF-16")
public class UTF16Charset {
}

View File

@ -0,0 +1,39 @@
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public class UTF16Helper {
public static final int SURROGATE_BIT_MASK = 0xFC00;
public static final int SURROGATE_BIT_INV_MASK = 0x03FF;
public static final int HIGH_SURROGATE_BITS = 0xF800;
public static final int LOW_SURROGATE_BITS = 0xF800;
public static final int MEANINGFUL_SURROGATE_BITS = 10;
public static final int SUPPLEMENTARY_PLANE = 0x10000;
public static char highSurrogate(int codePoint) {
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
}
public static char lowSurrogate(int codePoint) {
return (char)(HIGH_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
}
public static boolean isHighSurrogate(char c) {
return (c & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
}
public static boolean isLowSurrogate(char c) {
return (c & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
}
public static boolean isSurrogatePair(char a, char b) {
return isHighSurrogate(a) && isLowSurrogate(b);
}
public static int buildCodePoint(char a, char b) {
return ((a & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) |
(b & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
}
}

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang; package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.charset.UTF16Helper;
import org.teavm.classlib.java.lang.io.TSerializable; import org.teavm.classlib.java.lang.io.TSerializable;
import org.teavm.classlib.java.util.TArrays; import org.teavm.classlib.java.util.TArrays;
import org.teavm.javascript.ni.Remove; import org.teavm.javascript.ni.Remove;
@ -289,13 +290,13 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
} }
protected TAbstractStringBuilder appendCodePoint(int codePoint) { protected TAbstractStringBuilder appendCodePoint(int codePoint) {
if (codePoint < TString.SUPPLEMENTARY_PLANE) { if (codePoint < UTF16Helper.SUPPLEMENTARY_PLANE) {
return append((char)codePoint); return append((char)codePoint);
} }
ensureCapacity(length + 2); ensureCapacity(length + 2);
codePoint -= TString.SUPPLEMENTARY_PLANE; codePoint -= UTF16Helper.SUPPLEMENTARY_PLANE;
buffer[length++] = TString.highSurrogate(codePoint); buffer[length++] = UTF16Helper.highSurrogate(codePoint);
buffer[length++] = TString.lowSurrogate(codePoint); buffer[length++] = UTF16Helper.lowSurrogate(codePoint);
return this; return this;
} }
@ -362,6 +363,6 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
} }
public void setLength(int newLength) { public void setLength(int newLength) {
length = 0; length = newLength;
} }
} }

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang; package org.teavm.classlib.java.lang;
import org.teavm.classlib.impl.charset.UTF16Helper;
import org.teavm.classlib.java.lang.io.TSerializable; import org.teavm.classlib.java.lang.io.TSerializable;
import org.teavm.javascript.ni.GeneratedBy; import org.teavm.javascript.ni.GeneratedBy;
import org.teavm.javascript.ni.Rename; import org.teavm.javascript.ni.Rename;
@ -10,12 +11,6 @@ import org.teavm.javascript.ni.Rename;
*/ */
public class TString extends TObject implements TSerializable, TComparable<TString>, public class TString extends TObject implements TSerializable, TComparable<TString>,
TCharSequence { TCharSequence {
static int SURROGATE_BIT_MASK = 0xFC00;
static int SURROGATE_BIT_INV_MASK = 0x03FF;
static int HIGH_SURROGATE_BITS = 0xF800;
static int LOW_SURROGATE_BITS = 0xF800;
static int MEANINGFUL_SURROGATE_BITS = 10;
static int SUPPLEMENTARY_PLANE = 0x10000;
private char[] characters; private char[] characters;
private transient int hashCode; private transient int hashCode;
@ -54,29 +49,26 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
} }
public int codePointAt(int index) { public int codePointAt(int index) {
if (index == characters.length - 1 || (characters[index] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS || if (index == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
(characters[index + 1] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS) { !UTF16Helper.isLowSurrogate(characters[index + 1])) {
return characters[index]; return characters[index];
} }
return ((characters[index] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | return UTF16Helper.buildCodePoint(characters[index], characters[index + 1]);
(characters[index + 1] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
} }
public int codePointBefore(int index) { public int codePointBefore(int index) {
if (index == 1 || (characters[index] & SURROGATE_BIT_MASK) != LOW_SURROGATE_BITS || if (index == 1 || !UTF16Helper.isHighSurrogate(characters[index]) ||
(characters[index - 1] & SURROGATE_BIT_MASK) != HIGH_SURROGATE_BITS) { UTF16Helper.isLowSurrogate(characters[index - 1])) {
return characters[index - 1]; return characters[index - 1];
} }
return ((characters[index - 1] & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | return UTF16Helper.buildCodePoint(characters[index - 1], characters[index]);
(characters[index] & SURROGATE_BIT_INV_MASK) + SUPPLEMENTARY_PLANE;
} }
public int codePointCount(int beginIndex, int endIndex) { public int codePointCount(int beginIndex, int endIndex) {
int count = endIndex; int count = endIndex;
--endIndex; --endIndex;
for (int i = beginIndex; i < endIndex; ++i) { for (int i = beginIndex; i < endIndex; ++i) {
if ((characters[i] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS && if (UTF16Helper.isHighSurrogate(characters[i]) && UTF16Helper.isLowSurrogate(characters[i + 1])) {
(characters[i + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) {
--count; --count;
} }
} }
@ -85,8 +77,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
public int offsetByCodePoints(int index, int codePointOffset) { public int offsetByCodePoints(int index, int codePointOffset) {
for (int i = 0; i < codePointOffset; ++i) { for (int i = 0; i < codePointOffset; ++i) {
if (index < characters.length - 1 && (characters[index] & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS && if (index < characters.length - 1 && UTF16Helper.isHighSurrogate(characters[index]) &&
(characters[index + 1] & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS) { UTF16Helper.isLowSurrogate(characters[index + 1])) {
index += 2; index += 2;
} else { } else {
index++; index++;
@ -192,17 +184,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
return true; return true;
} }
static char highSurrogate(int codePoint) {
return (char)(TString.HIGH_SURROGATE_BITS | (codePoint >> TString.MEANINGFUL_SURROGATE_BITS) &
TString.SURROGATE_BIT_INV_MASK);
}
static char lowSurrogate(int codePoint) {
return (char)(TString.HIGH_SURROGATE_BITS | codePoint & TString.SURROGATE_BIT_INV_MASK);
}
public int indexOf(int ch, int fromIndex) { public int indexOf(int ch, int fromIndex) {
if (ch < SUPPLEMENTARY_PLANE) { if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
char bmpChar = (char)ch; char bmpChar = (char)ch;
for (int i = fromIndex; i < characters.length; ++i) { for (int i = fromIndex; i < characters.length; ++i) {
if (characters[i] == bmpChar) { if (characters[i] == bmpChar) {
@ -211,8 +194,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
} }
return -1; return -1;
} else { } else {
char hi = highSurrogate(ch); char hi = UTF16Helper.highSurrogate(ch);
char lo = lowSurrogate(ch); char lo = UTF16Helper.lowSurrogate(ch);
for (int i = fromIndex; i < characters.length - 1; ++i) { for (int i = fromIndex; i < characters.length - 1; ++i) {
if (characters[i] == hi && characters[i + 1] == lo) { if (characters[i] == hi && characters[i + 1] == lo) {
return i; return i;
@ -227,7 +210,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
} }
public int lastIndexOf(int ch, int fromIndex) { public int lastIndexOf(int ch, int fromIndex) {
if (ch < SUPPLEMENTARY_PLANE) { if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
char bmpChar = (char)ch; char bmpChar = (char)ch;
for (int i = fromIndex; i >= 0; --i) { for (int i = fromIndex; i >= 0; --i) {
if (characters[i] == bmpChar) { if (characters[i] == bmpChar) {
@ -236,8 +219,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
} }
return -1; return -1;
} else { } else {
char hi = highSurrogate(ch); char hi = UTF16Helper.highSurrogate(ch);
char lo = lowSurrogate(ch); char lo = UTF16Helper.lowSurrogate(ch);
for (int i = fromIndex; i >= 1; --i) { for (int i = fromIndex; i >= 1; --i) {
if (characters[i] == lo && characters[i - 1] == hi) { if (characters[i] == lo && characters[i - 1] == hi) {
return i; return i;

View File

@ -1,5 +1,6 @@
package org.teavm.classlib.java.lang.io; package org.teavm.classlib.java.lang.io;
import org.teavm.classlib.java.lang.TMath;
import org.teavm.classlib.java.lang.TStringBuilder; import org.teavm.classlib.java.lang.TStringBuilder;
/** /**
@ -95,7 +96,16 @@ public class TPrintStream extends TFilterOutputStream {
} }
public void print(char[] s) { public void print(char[] s) {
print(s, 0, s.length);
}
private void print(char[] s, int begin, int end) {
int[] codePoints = new int[TMath.min(s.length, 4096)];
}
public void print(char c) {
buffer[0] = c;
print(buffer, 0, 1);
} }
public void print(int i) { public void print(int i) {