mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2025-01-10 08:54:11 -08:00
Replacing old TeaVM-based charsets with NIO charsets
This commit is contained in:
parent
cc5225a2a6
commit
aa2451c3e3
|
@ -1,61 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2013 Alexey Andreev.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.teavm.classlib.impl.charset;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author Alexey Andreev
|
|
||||||
*/
|
|
||||||
public class UTF16Helper {
|
|
||||||
public static final int SURROGATE_NEUTRAL_BIT_MASK = 0xF800;
|
|
||||||
public static final int SURROGATE_BITS = 0xD800;
|
|
||||||
public static final int SURROGATE_BIT_MASK = 0xFC00;
|
|
||||||
public static final int SURROGATE_BIT_INV_MASK = 0x03FF;
|
|
||||||
public static final int HIGH_SURROGATE_BITS = 0xD800;
|
|
||||||
public static final int LOW_SURROGATE_BITS = 0xDC00;
|
|
||||||
public static final int MEANINGFUL_SURROGATE_BITS = 10;
|
|
||||||
public static final int SUPPLEMENTARY_PLANE = 0x10000;
|
|
||||||
|
|
||||||
public static char highSurrogate(int codePoint) {
|
|
||||||
codePoint -= SUPPLEMENTARY_PLANE;
|
|
||||||
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static char lowSurrogate(int codePoint) {
|
|
||||||
return (char)(LOW_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isHighSurrogate(char c) {
|
|
||||||
return (c & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isLowSurrogate(char c) {
|
|
||||||
return (c & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isSurrogatePair(char a, char b) {
|
|
||||||
return isHighSurrogate(a) && isLowSurrogate(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int buildCodePoint(char a, char b) {
|
|
||||||
return (((a & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | (b & SURROGATE_BIT_INV_MASK)) +
|
|
||||||
SUPPLEMENTARY_PLANE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isSurrogate(char c) {
|
|
||||||
return (c & SURROGATE_NEUTRAL_BIT_MASK) == SURROGATE_BITS;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -29,17 +29,17 @@ public class UTF8Charset extends Charset {
|
||||||
} else if (ch < 0x400) {
|
} else if (ch < 0x400) {
|
||||||
dest.put((byte)(0xC0 | (ch >> 6)));
|
dest.put((byte)(0xC0 | (ch >> 6)));
|
||||||
dest.put((byte)(0x80 | (ch & 0x3F)));
|
dest.put((byte)(0x80 | (ch & 0x3F)));
|
||||||
} else if (!UTF16Helper.isSurrogate(ch)) {
|
} else if (!Character.isSurrogate(ch)) {
|
||||||
dest.put((byte)(0xE0 | (ch >> 12)));
|
dest.put((byte)(0xE0 | (ch >> 12)));
|
||||||
dest.put((byte)(0x80 | ((ch >> 6) & 0x3F)));
|
dest.put((byte)(0x80 | ((ch >> 6) & 0x3F)));
|
||||||
dest.put((byte)(0x80 | (ch & 0x3F)));
|
dest.put((byte)(0x80 | (ch & 0x3F)));
|
||||||
} else if (UTF16Helper.isHighSurrogate(ch)) {
|
} else if (Character.isHighSurrogate(ch)) {
|
||||||
char low = source.get();
|
char low = source.get();
|
||||||
if (!UTF16Helper.isLowSurrogate(low)) {
|
if (!Character.isLowSurrogate(low)) {
|
||||||
source.back(1);
|
source.back(1);
|
||||||
dest.put((byte)'?');
|
dest.put((byte)'?');
|
||||||
} else {
|
} else {
|
||||||
int codePoint = UTF16Helper.buildCodePoint(ch, low);
|
int codePoint = Character.toCodePoint(ch, low);
|
||||||
dest.put((byte)(0xF0 | (codePoint >> 18)));
|
dest.put((byte)(0xF0 | (codePoint >> 18)));
|
||||||
dest.put((byte)(0x80 | ((codePoint >> 12) & 0x3F)));
|
dest.put((byte)(0x80 | ((codePoint >> 12) & 0x3F)));
|
||||||
dest.put((byte)(0x80 | ((codePoint >> 6) & 0x3F)));
|
dest.put((byte)(0x80 | ((codePoint >> 6) & 0x3F)));
|
||||||
|
@ -72,7 +72,7 @@ public class UTF8Charset extends Charset {
|
||||||
byte b2 = source.get();
|
byte b2 = source.get();
|
||||||
byte b3 = source.get();
|
byte b3 = source.get();
|
||||||
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
||||||
dest.put(!UTF16Helper.isHighSurrogate(c) ? c : '?');
|
dest.put(!Character.isHighSurrogate(c) ? c : '?');
|
||||||
} else if ((b & 0xF8) == 0xF0) {
|
} else if ((b & 0xF8) == 0xF0) {
|
||||||
if (source.available() < 3) {
|
if (source.available() < 3) {
|
||||||
source.skip(source.available());
|
source.skip(source.available());
|
||||||
|
@ -83,8 +83,8 @@ public class UTF8Charset extends Charset {
|
||||||
byte b3 = source.get();
|
byte b3 = source.get();
|
||||||
byte b4 = source.get();
|
byte b4 = source.get();
|
||||||
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
||||||
dest.put(UTF16Helper.highSurrogate(code));
|
dest.put(Character.highSurrogate(code));
|
||||||
dest.put(UTF16Helper.lowSurrogate(code));
|
dest.put(Character.lowSurrogate(code));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,11 +15,13 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.io;
|
package org.teavm.classlib.java.io;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.ByteBuffer;
|
|
||||||
import org.teavm.classlib.impl.charset.CharBuffer;
|
|
||||||
import org.teavm.classlib.impl.charset.Charset;
|
|
||||||
import org.teavm.classlib.impl.charset.UTF8Charset;
|
|
||||||
import org.teavm.classlib.java.lang.TString;
|
import org.teavm.classlib.java.lang.TString;
|
||||||
|
import org.teavm.classlib.java.nio.TByteBuffer;
|
||||||
|
import org.teavm.classlib.java.nio.TCharBuffer;
|
||||||
|
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||||
|
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
||||||
|
import org.teavm.classlib.java.nio.charset.TCodingErrorAction;
|
||||||
|
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -27,30 +29,30 @@ import org.teavm.classlib.java.lang.TString;
|
||||||
*/
|
*/
|
||||||
public class TInputStreamReader extends TReader {
|
public class TInputStreamReader extends TReader {
|
||||||
private TInputStream stream;
|
private TInputStream stream;
|
||||||
private Charset charset;
|
private TCharset charset;
|
||||||
private TString charsetName;
|
private TString charsetName;
|
||||||
private byte[] inData = new byte[8192];
|
private byte[] inData = new byte[8192];
|
||||||
private ByteBuffer inBuffer = new ByteBuffer(inData);
|
private TByteBuffer inBuffer = TByteBuffer.wrap(inData);
|
||||||
private char[] outData = new char[1024];
|
private char[] outData = new char[1024];
|
||||||
private CharBuffer outBuffer = new CharBuffer(outData);
|
private TCharBuffer outBuffer = TCharBuffer.wrap(outData);
|
||||||
private boolean streamEof;
|
private boolean streamEof;
|
||||||
private boolean eof;
|
private boolean eof;
|
||||||
|
|
||||||
public TInputStreamReader(TInputStream in, TString charsetName) {
|
public TInputStreamReader(TInputStream in, TString charsetName) {
|
||||||
this(in, Charset.get(charsetName.toString()));
|
this(in, TCharset.forName(charsetName.toString()));
|
||||||
this.charsetName = charsetName;
|
this.charsetName = charsetName;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TInputStreamReader(TInputStream in) {
|
public TInputStreamReader(TInputStream in) {
|
||||||
this(in, new UTF8Charset());
|
this(in, new TUTF8Charset());
|
||||||
charsetName = TString.wrap("UTF-8");
|
charsetName = TString.wrap("UTF-8");
|
||||||
}
|
}
|
||||||
|
|
||||||
private TInputStreamReader(TInputStream in, Charset charset) {
|
public TInputStreamReader(TInputStream in, TCharset charset) {
|
||||||
this.stream = in;
|
this.stream = in;
|
||||||
this.charset = charset;
|
this.charset = charset;
|
||||||
outBuffer.skip(outBuffer.available());
|
outBuffer.position(outBuffer.limit());
|
||||||
inBuffer.skip(inBuffer.available());
|
inBuffer.position(inBuffer.limit());
|
||||||
}
|
}
|
||||||
|
|
||||||
public TString getEncoding() {
|
public TString getEncoding() {
|
||||||
|
@ -64,10 +66,10 @@ public class TInputStreamReader extends TReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read() throws TIOException {
|
public int read() throws TIOException {
|
||||||
if (eof && outBuffer.end()) {
|
if (eof && !outBuffer.hasRemaining()) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (!outBuffer.end()) {
|
if (outBuffer.hasRemaining()) {
|
||||||
return outBuffer.get();
|
return outBuffer.get();
|
||||||
}
|
}
|
||||||
return fillBuffer() ? outBuffer.get() : -1;
|
return fillBuffer() ? outBuffer.get() : -1;
|
||||||
|
@ -75,37 +77,40 @@ public class TInputStreamReader extends TReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read(char[] cbuf, int off, int len) throws TIOException {
|
public int read(char[] cbuf, int off, int len) throws TIOException {
|
||||||
if (eof && outBuffer.end()) {
|
if (eof && !outBuffer.hasRemaining()) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
CharBuffer wrapBuffer = new CharBuffer(cbuf, off, off + len);
|
int bytesRead = 0;
|
||||||
while (!wrapBuffer.end()) {
|
while (len > 0) {
|
||||||
wrapBuffer.put(outBuffer);
|
int sz = Math.min(len, outBuffer.remaining());
|
||||||
if (outBuffer.end() && !fillBuffer()) {
|
outBuffer.get(cbuf, off + bytesRead, sz);
|
||||||
|
len -= sz;
|
||||||
|
bytesRead += sz;
|
||||||
|
if (!outBuffer.hasRemaining() && !fillBuffer()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return wrapBuffer.position() - off;
|
return bytesRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean fillBuffer() throws TIOException {
|
private boolean fillBuffer() throws TIOException {
|
||||||
if (eof) {
|
if (eof) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
CharBuffer newBuffer = new CharBuffer(outData);
|
outBuffer.compact();
|
||||||
newBuffer.put(outBuffer);
|
TCharsetDecoder decoder = charset.newDecoder()
|
||||||
|
.onMalformedInput(TCodingErrorAction.REPLACE)
|
||||||
|
.onUnmappableCharacter(TCodingErrorAction.IGNORE);
|
||||||
while (true) {
|
while (true) {
|
||||||
if (inBuffer.end() && !fillReadBuffer()) {
|
if (!inBuffer.hasRemaining() && !fillReadBuffer()) {
|
||||||
eof = true;
|
eof = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
int oldAvail = newBuffer.available();
|
if (decoder.decode(inBuffer, outBuffer, eof).isOverflow()) {
|
||||||
charset.decode(inBuffer, newBuffer);
|
|
||||||
if (oldAvail == newBuffer.available()) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
outBuffer = new CharBuffer(outData, 0, newBuffer.position());
|
outBuffer.flip();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,30 +118,25 @@ public class TInputStreamReader extends TReader {
|
||||||
if (streamEof) {
|
if (streamEof) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int off = 0;
|
inBuffer.compact();
|
||||||
while (!inBuffer.end()) {
|
while (inBuffer.hasRemaining()) {
|
||||||
inData[off] = inBuffer.get();
|
int bytesRead = stream.read(inBuffer.array(), inBuffer.position(), inBuffer.remaining());
|
||||||
}
|
|
||||||
inBuffer.rewind(0);
|
|
||||||
while (off < inData.length) {
|
|
||||||
int bytesRead = stream.read(inData, off, inData.length - off);
|
|
||||||
if (bytesRead == -1) {
|
if (bytesRead == -1) {
|
||||||
streamEof = true;
|
streamEof = true;
|
||||||
inBuffer = new ByteBuffer(inData, 0, inBuffer.position());
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
off += bytesRead;
|
inBuffer.position(inBuffer.position() + bytesRead);
|
||||||
if (bytesRead == 0) {
|
if (bytesRead == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inBuffer = new ByteBuffer(inData, 0, off);
|
inBuffer.flip();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean ready() throws TIOException {
|
public boolean ready() throws TIOException {
|
||||||
return !outBuffer.end() || inBuffer.end();
|
return outBuffer.hasRemaining() || inBuffer.hasRemaining();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
|
||||||
import org.teavm.classlib.java.io.TSerializable;
|
import org.teavm.classlib.java.io.TSerializable;
|
||||||
import org.teavm.classlib.java.util.TArrays;
|
import org.teavm.classlib.java.util.TArrays;
|
||||||
|
|
||||||
|
@ -553,12 +552,12 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
|
||||||
}
|
}
|
||||||
|
|
||||||
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
|
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
|
||||||
if (codePoint < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (codePoint < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
return append((char)codePoint);
|
return append((char)codePoint);
|
||||||
}
|
}
|
||||||
ensureCapacity(length + 2);
|
ensureCapacity(length + 2);
|
||||||
buffer[length++] = UTF16Helper.highSurrogate(codePoint);
|
buffer[length++] = TCharacter.highSurrogate(codePoint);
|
||||||
buffer[length++] = UTF16Helper.lowSurrogate(codePoint);
|
buffer[length++] = TCharacter.lowSurrogate(codePoint);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
|
||||||
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
||||||
import org.teavm.platform.Platform;
|
import org.teavm.platform.Platform;
|
||||||
import org.teavm.platform.metadata.MetadataProvider;
|
import org.teavm.platform.metadata.MetadataProvider;
|
||||||
|
@ -96,6 +95,13 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
private static UnicodeHelper.Range[] classMapping;
|
private static UnicodeHelper.Range[] classMapping;
|
||||||
private char value;
|
private char value;
|
||||||
private static TCharacter[] characterCache = new TCharacter[128];
|
private static TCharacter[] characterCache = new TCharacter[128];
|
||||||
|
private static final int SURROGATE_NEUTRAL_BIT_MASK = 0xF800;
|
||||||
|
private static final int SURROGATE_BITS = 0xD800;
|
||||||
|
private static final int SURROGATE_BIT_MASK = 0xFC00;
|
||||||
|
private static final int SURROGATE_BIT_INV_MASK = 0x03FF;
|
||||||
|
private static final int HIGH_SURROGATE_BITS = 0xD800;
|
||||||
|
private static final int LOW_SURROGATE_BITS = 0xDC00;
|
||||||
|
private static final int MEANINGFUL_SURROGATE_BITS = 10;
|
||||||
|
|
||||||
public TCharacter(char value) {
|
public TCharacter(char value) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
|
@ -152,11 +158,11 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isHighSurrogate(char ch) {
|
public static boolean isHighSurrogate(char ch) {
|
||||||
return UTF16Helper.isHighSurrogate(ch);
|
return (ch & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isLowSurrogate(char ch) {
|
public static boolean isLowSurrogate(char ch) {
|
||||||
return UTF16Helper.isLowSurrogate(ch);
|
return (ch & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isSurrogate(char ch) {
|
public static boolean isSurrogate(char ch) {
|
||||||
|
@ -172,7 +178,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int toCodePoint(char high, char low) {
|
public static int toCodePoint(char high, char low) {
|
||||||
return UTF16Helper.buildCodePoint(high, low);
|
return (((high & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | (low & SURROGATE_BIT_INV_MASK)) +
|
||||||
|
MIN_SUPPLEMENTARY_CODE_POINT;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int codePointAt(TCharSequence seq, int index) {
|
public static int codePointAt(TCharSequence seq, int index) {
|
||||||
|
@ -216,11 +223,12 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static char highSurrogate(int codePoint) {
|
public static char highSurrogate(int codePoint) {
|
||||||
return UTF16Helper.highSurrogate(codePoint);
|
codePoint -= MIN_SUPPLEMENTARY_CODE_POINT;
|
||||||
|
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static char lowSurrogate(int codePoint) {
|
public static char lowSurrogate(int codePoint) {
|
||||||
return UTF16Helper.lowSurrogate(codePoint);
|
return (char)(LOW_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static char toLowerCase(char ch) {
|
public static char toLowerCase(char ch) {
|
||||||
|
@ -309,9 +317,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
private static native StringResource obtainClasses();
|
private static native StringResource obtainClasses();
|
||||||
|
|
||||||
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
||||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
dst[dstIndex] = UTF16Helper.highSurrogate(codePoint);
|
dst[dstIndex] = highSurrogate(codePoint);
|
||||||
dst[dstIndex + 1] = UTF16Helper.lowSurrogate(codePoint);
|
dst[dstIndex + 1] = lowSurrogate(codePoint);
|
||||||
return 2;
|
return 2;
|
||||||
} else {
|
} else {
|
||||||
dst[dstIndex] = (char)codePoint;
|
dst[dstIndex] = (char)codePoint;
|
||||||
|
@ -320,8 +328,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static char[] toChars(int codePoint) {
|
public static char[] toChars(int codePoint) {
|
||||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
return new char[] { UTF16Helper.highSurrogate(codePoint), UTF16Helper.lowSurrogate(codePoint) };
|
return new char[] { highSurrogate(codePoint), lowSurrogate(codePoint) };
|
||||||
} else {
|
} else {
|
||||||
return new char[] { (char)codePoint };
|
return new char[] { (char)codePoint };
|
||||||
}
|
}
|
||||||
|
@ -331,7 +339,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
int count = endIndex - beginIndex;
|
int count = endIndex - beginIndex;
|
||||||
--endIndex;
|
--endIndex;
|
||||||
for (int i = beginIndex; i < endIndex; ++i) {
|
for (int i = beginIndex; i < endIndex; ++i) {
|
||||||
if (UTF16Helper.isHighSurrogate(seq.charAt(i)) && UTF16Helper.isLowSurrogate(seq.charAt(i + 1))) {
|
if (isHighSurrogate(seq.charAt(i)) && isLowSurrogate(seq.charAt(i + 1))) {
|
||||||
--count;
|
--count;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
@ -343,7 +351,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
int r = count;
|
int r = count;
|
||||||
--count;
|
--count;
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
if (UTF16Helper.isHighSurrogate(a[offset]) && UTF16Helper.isLowSurrogate(a[offset + i + 1])) {
|
if (isHighSurrogate(a[offset]) && isLowSurrogate(a[offset + i + 1])) {
|
||||||
--r;
|
--r;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
@ -353,8 +361,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
|
|
||||||
public static int offsetByCodePoints(TCharSequence seq, int index, int codePointOffset) {
|
public static int offsetByCodePoints(TCharSequence seq, int index, int codePointOffset) {
|
||||||
for (int i = 0; i < codePointOffset; ++i) {
|
for (int i = 0; i < codePointOffset; ++i) {
|
||||||
if (index < seq.length() - 1 && UTF16Helper.isHighSurrogate(seq.charAt(index)) &&
|
if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index)) &&
|
||||||
UTF16Helper.isLowSurrogate(seq.charAt(index + 1))) {
|
isLowSurrogate(seq.charAt(index + 1))) {
|
||||||
index += 2;
|
index += 2;
|
||||||
} else {
|
} else {
|
||||||
index++;
|
index++;
|
||||||
|
@ -365,8 +373,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||||
|
|
||||||
public static int offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset) {
|
public static int offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset) {
|
||||||
for (int i = 0; i < codePointOffset; ++i) {
|
for (int i = 0; i < codePointOffset; ++i) {
|
||||||
if (index < count - 1 && UTF16Helper.isHighSurrogate(a[index + start]) &&
|
if (index < count - 1 && isHighSurrogate(a[index + start]) && isLowSurrogate(a[index + start + 1])) {
|
||||||
UTF16Helper.isLowSurrogate(a[index + start + 1])) {
|
|
||||||
index += 2;
|
index += 2;
|
||||||
} else {
|
} else {
|
||||||
index++;
|
index++;
|
||||||
|
|
|
@ -15,9 +15,12 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.lang;
|
package org.teavm.classlib.java.lang;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.*;
|
|
||||||
import org.teavm.classlib.java.io.TSerializable;
|
import org.teavm.classlib.java.io.TSerializable;
|
||||||
import org.teavm.classlib.java.io.TUnsupportedEncodingException;
|
import org.teavm.classlib.java.io.TUnsupportedEncodingException;
|
||||||
|
import org.teavm.classlib.java.nio.TByteBuffer;
|
||||||
|
import org.teavm.classlib.java.nio.TCharBuffer;
|
||||||
|
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||||
|
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||||
import org.teavm.classlib.java.util.TArrays;
|
import org.teavm.classlib.java.util.TArrays;
|
||||||
import org.teavm.classlib.java.util.TComparator;
|
import org.teavm.classlib.java.util.TComparator;
|
||||||
import org.teavm.classlib.java.util.THashMap;
|
import org.teavm.classlib.java.util.THashMap;
|
||||||
|
@ -61,15 +64,15 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public TString(byte[] bytes, int offset, int length, TString charsetName) throws TUnsupportedEncodingException {
|
public TString(byte[] bytes, int offset, int length, TString charsetName) throws TUnsupportedEncodingException {
|
||||||
Charset charset = Charset.get(charsetName.toString());
|
this(bytes, offset, length, TCharset.forName(charsetName.toString()));
|
||||||
if (charset == null) {
|
}
|
||||||
throw new TUnsupportedEncodingException(TString.wrap("Unknown encoding:" + charsetName));
|
|
||||||
}
|
public TString(byte[] bytes, int offset, int length, TCharset charset) {
|
||||||
initWithBytes(bytes, offset, length, charset);
|
initWithBytes(bytes, offset, length, charset);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TString(byte[] bytes, int offset, int length) {
|
public TString(byte[] bytes, int offset, int length) {
|
||||||
initWithBytes(bytes, offset, length, new UTF8Charset());
|
initWithBytes(bytes, offset, length, new TUTF8Charset());
|
||||||
}
|
}
|
||||||
|
|
||||||
public TString(byte[] bytes) {
|
public TString(byte[] bytes) {
|
||||||
|
@ -80,14 +83,18 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
this(bytes, 0, bytes.length, charsetName);
|
this(bytes, 0, bytes.length, charsetName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TString(byte[] bytes, TCharset charset) {
|
||||||
|
this(bytes, 0, bytes.length, charset);
|
||||||
|
}
|
||||||
|
|
||||||
public TString(int[] codePoints, int offset, int count) {
|
public TString(int[] codePoints, int offset, int count) {
|
||||||
characters = new char[count * 2];
|
characters = new char[count * 2];
|
||||||
int charCount = 0;
|
int charCount = 0;
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
int codePoint = codePoints[offset++];
|
int codePoint = codePoints[offset++];
|
||||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (codePoint >= TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
characters[charCount++] = UTF16Helper.highSurrogate(codePoint);
|
characters[charCount++] = TCharacter.highSurrogate(codePoint);
|
||||||
characters[charCount++] = UTF16Helper.lowSurrogate(codePoint);
|
characters[charCount++] = TCharacter.lowSurrogate(codePoint);
|
||||||
} else {
|
} else {
|
||||||
characters[charCount++] = (char)codePoint;
|
characters[charCount++] = (char)codePoint;
|
||||||
}
|
}
|
||||||
|
@ -97,19 +104,14 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initWithBytes(byte[] bytes, int offset, int length, Charset charset) {
|
private void initWithBytes(byte[] bytes, int offset, int length, TCharset charset) {
|
||||||
TStringBuilder sb = new TStringBuilder(bytes.length * 2);
|
TCharBuffer buffer = charset.decode(TByteBuffer.wrap(bytes, offset, length));
|
||||||
this.characters = new char[sb.length()];
|
if (buffer.hasArray() && buffer.position() == 0 && buffer.limit() == buffer.capacity()) {
|
||||||
ByteBuffer source = new ByteBuffer(bytes, offset, offset + length);
|
characters = buffer.array();
|
||||||
char[] destChars = new char[TMath.max(8, TMath.min(length * 2, 1024))];
|
} else {
|
||||||
CharBuffer dest = new CharBuffer(destChars, 0, destChars.length);
|
characters = new char[buffer.remaining()];
|
||||||
while (!source.end()) {
|
buffer.get(characters);
|
||||||
charset.decode(source, dest);
|
|
||||||
sb.append(destChars, 0, dest.position());
|
|
||||||
dest.rewind(0);
|
|
||||||
}
|
}
|
||||||
characters = new char[sb.length()];
|
|
||||||
sb.getChars(0, sb.length(), characters, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public TString(TStringBuilder sb) {
|
public TString(TStringBuilder sb) {
|
||||||
|
@ -283,7 +285,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public int indexOf(int ch, int fromIndex) {
|
public int indexOf(int ch, int fromIndex) {
|
||||||
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (ch < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
char bmpChar = (char)ch;
|
char bmpChar = (char)ch;
|
||||||
for (int i = fromIndex; i < characters.length; ++i) {
|
for (int i = fromIndex; i < characters.length; ++i) {
|
||||||
if (characters[i] == bmpChar) {
|
if (characters[i] == bmpChar) {
|
||||||
|
@ -292,8 +294,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
char hi = UTF16Helper.highSurrogate(ch);
|
char hi = TCharacter.highSurrogate(ch);
|
||||||
char lo = UTF16Helper.lowSurrogate(ch);
|
char lo = TCharacter.lowSurrogate(ch);
|
||||||
for (int i = fromIndex; i < characters.length - 1; ++i) {
|
for (int i = fromIndex; i < characters.length - 1; ++i) {
|
||||||
if (characters[i] == hi && characters[i + 1] == lo) {
|
if (characters[i] == hi && characters[i + 1] == lo) {
|
||||||
return i;
|
return i;
|
||||||
|
@ -308,7 +310,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public int lastIndexOf(int ch, int fromIndex) {
|
public int lastIndexOf(int ch, int fromIndex) {
|
||||||
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
if (ch < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
char bmpChar = (char)ch;
|
char bmpChar = (char)ch;
|
||||||
for (int i = fromIndex; i >= 0; --i) {
|
for (int i = fromIndex; i >= 0; --i) {
|
||||||
if (characters[i] == bmpChar) {
|
if (characters[i] == bmpChar) {
|
||||||
|
@ -317,8 +319,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
char hi = UTF16Helper.highSurrogate(ch);
|
char hi = TCharacter.highSurrogate(ch);
|
||||||
char lo = UTF16Helper.lowSurrogate(ch);
|
char lo = TCharacter.lowSurrogate(ch);
|
||||||
for (int i = fromIndex; i >= 1; --i) {
|
for (int i = fromIndex; i >= 1; --i) {
|
||||||
if (characters[i] == lo && characters[i - 1] == hi) {
|
if (characters[i] == lo && characters[i - 1] == hi) {
|
||||||
return i - 1;
|
return i - 1;
|
||||||
|
@ -550,34 +552,22 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getBytes(TString charsetName) throws TUnsupportedEncodingException {
|
public byte[] getBytes(TString charsetName) throws TUnsupportedEncodingException {
|
||||||
Charset charset = Charset.get(charsetName.toString());
|
return getBytes(TCharset.forName(charsetName.toString()));
|
||||||
if (charset == null) {
|
|
||||||
throw new TUnsupportedEncodingException(TString.wrap("Unsupported encoding: " + charsetName));
|
|
||||||
}
|
|
||||||
return getBytes(charset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getBytes() {
|
public byte[] getBytes() {
|
||||||
return getBytes(new UTF8Charset());
|
return getBytes(new TUTF8Charset());
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] getBytes(Charset charset) {
|
public byte[] getBytes(TCharset charset) {
|
||||||
byte[] result = new byte[length() * 2];
|
TByteBuffer buffer = charset.encode(TCharBuffer.wrap(characters));
|
||||||
int resultLength = 0;
|
if (buffer.hasArray() && buffer.position() == 0 && buffer.limit() == buffer.capacity()) {
|
||||||
byte[] destArray = new byte[TMath.max(16, TMath.min(length() * 2, 4096))];
|
return buffer.array();
|
||||||
ByteBuffer dest = new ByteBuffer(destArray);
|
} else {
|
||||||
CharBuffer src = new CharBuffer(characters);
|
byte[] result = new byte[buffer.remaining()];
|
||||||
while (!src.end()) {
|
buffer.get(result);
|
||||||
charset.encode(src, dest);
|
return result;
|
||||||
if (resultLength + dest.position() > result.length) {
|
|
||||||
result = TArrays.copyOf(result, result.length * 2);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < dest.position(); ++i) {
|
|
||||||
result[resultLength++] = destArray[i];
|
|
||||||
}
|
|
||||||
dest.rewind(0);
|
|
||||||
}
|
}
|
||||||
return TArrays.copyOf(result, resultLength);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -601,11 +591,11 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
int[] codePoints = new int[characters.length];
|
int[] codePoints = new int[characters.length];
|
||||||
int codePointCount = 0;
|
int codePointCount = 0;
|
||||||
for (int i = 0; i < characters.length; ++i) {
|
for (int i = 0; i < characters.length; ++i) {
|
||||||
if (i == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[i]) ||
|
if (i == characters.length - 1 || !TCharacter.isHighSurrogate(characters[i]) ||
|
||||||
!UTF16Helper.isLowSurrogate(characters[i + 1])) {
|
!TCharacter.isLowSurrogate(characters[i + 1])) {
|
||||||
codePoints[codePointCount++] = TCharacter.toLowerCase(characters[i]);
|
codePoints[codePointCount++] = TCharacter.toLowerCase(characters[i]);
|
||||||
} else {
|
} else {
|
||||||
codePoints[codePointCount++] = TCharacter.toLowerCase(UTF16Helper.buildCodePoint(
|
codePoints[codePointCount++] = TCharacter.toLowerCase(TCharacter.toCodePoint(
|
||||||
characters[i], characters[i + 1]));
|
characters[i], characters[i + 1]));
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
@ -620,11 +610,11 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||||
int[] codePoints = new int[characters.length];
|
int[] codePoints = new int[characters.length];
|
||||||
int codePointCount = 0;
|
int codePointCount = 0;
|
||||||
for (int i = 0; i < characters.length; ++i) {
|
for (int i = 0; i < characters.length; ++i) {
|
||||||
if (i == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[i]) ||
|
if (i == characters.length - 1 || !TCharacter.isHighSurrogate(characters[i]) ||
|
||||||
!UTF16Helper.isLowSurrogate(characters[i + 1])) {
|
!TCharacter.isLowSurrogate(characters[i + 1])) {
|
||||||
codePoints[codePointCount++] = TCharacter.toUpperCase(characters[i]);
|
codePoints[codePointCount++] = TCharacter.toUpperCase(characters[i]);
|
||||||
} else {
|
} else {
|
||||||
codePoints[codePointCount++] = TCharacter.toUpperCase(UTF16Helper.buildCodePoint(
|
codePoints[codePointCount++] = TCharacter.toUpperCase(TCharacter.toCodePoint(
|
||||||
characters[i], characters[i + 1]));
|
characters[i], characters[i + 1]));
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ public abstract class TCharsetDecoder {
|
||||||
private TCharset charset;
|
private TCharset charset;
|
||||||
private float averageCharsPerByte;
|
private float averageCharsPerByte;
|
||||||
private float maxCharsPerByte;
|
private float maxCharsPerByte;
|
||||||
private String replacement = "?";
|
private String replacement = "\uFFFD";
|
||||||
private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT;
|
private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT;
|
||||||
private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT;
|
private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT;
|
||||||
private int state;
|
private int state;
|
||||||
|
@ -126,8 +126,18 @@ public abstract class TCharsetDecoder {
|
||||||
return result;
|
return result;
|
||||||
} else if (result.isUnderflow()) {
|
} else if (result.isUnderflow()) {
|
||||||
if (endOfInput && in.hasRemaining()) {
|
if (endOfInput && in.hasRemaining()) {
|
||||||
state = END;
|
if (malformedAction == TCodingErrorAction.REPORT) {
|
||||||
return TCoderResult.malformedForLength(in.remaining());
|
return TCoderResult.malformedForLength(in.remaining());
|
||||||
|
} else {
|
||||||
|
if (out.remaining() > replacement.length()) {
|
||||||
|
in.position(in.position() + in.remaining());
|
||||||
|
if (malformedAction == TCodingErrorAction.REPLACE) {
|
||||||
|
out.put(replacement);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return TCoderResult.OVERFLOW;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
} else if (result.isMalformed()) {
|
} else if (result.isMalformed()) {
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.nio.charset.impl;
|
package org.teavm.classlib.java.nio.charset.impl;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
|
||||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||||
|
|
||||||
|
@ -44,7 +43,13 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
outArray[outPos++] = (char)(((b & 0x1F) << 6) | (inArray[inPos++] & 0x3F));
|
byte b2 = inArray[inPos++];
|
||||||
|
if (!checkMidByte(b2)) {
|
||||||
|
inPos -= 2;
|
||||||
|
result = TCoderResult.malformedForLength(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
outArray[outPos++] = (char)(((b & 0x1F) << 6) | (b2 & 0x3F));
|
||||||
} else if ((b & 0xF0) == 0xE0) {
|
} else if ((b & 0xF0) == 0xE0) {
|
||||||
if (inPos + 2 > inSize) {
|
if (inPos + 2 > inSize) {
|
||||||
--inPos;
|
--inPos;
|
||||||
|
@ -55,6 +60,11 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||||
}
|
}
|
||||||
byte b2 = inArray[inPos++];
|
byte b2 = inArray[inPos++];
|
||||||
byte b3 = inArray[inPos++];
|
byte b3 = inArray[inPos++];
|
||||||
|
if (!checkMidByte(b2) || !checkMidByte(b3)) {
|
||||||
|
inPos -= 3;
|
||||||
|
result = TCoderResult.malformedForLength(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
||||||
if (Character.isSurrogate(c)) {
|
if (Character.isSurrogate(c)) {
|
||||||
inPos -= 3;
|
inPos -= 3;
|
||||||
|
@ -72,7 +82,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||||
}
|
}
|
||||||
if (outPos + 2 > outSize) {
|
if (outPos + 2 > outSize) {
|
||||||
--inPos;
|
--inPos;
|
||||||
if (!controller.hasMoreOutput()) {
|
if (!controller.hasMoreOutput(2)) {
|
||||||
result = TCoderResult.OVERFLOW;
|
result = TCoderResult.OVERFLOW;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -80,9 +90,18 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||||
byte b2 = inArray[inPos++];
|
byte b2 = inArray[inPos++];
|
||||||
byte b3 = inArray[inPos++];
|
byte b3 = inArray[inPos++];
|
||||||
byte b4 = inArray[inPos++];
|
byte b4 = inArray[inPos++];
|
||||||
|
if (!checkMidByte(b2) || !checkMidByte(b3) || !checkMidByte(b4)) {
|
||||||
|
inPos -= 3;
|
||||||
|
result = TCoderResult.malformedForLength(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
||||||
outArray[outPos++] = UTF16Helper.highSurrogate(code);
|
outArray[outPos++] = Character.highSurrogate(code);
|
||||||
outArray[outPos++] = UTF16Helper.lowSurrogate(code);
|
outArray[outPos++] = Character.lowSurrogate(code);
|
||||||
|
} else {
|
||||||
|
--inPos;
|
||||||
|
result = TCoderResult.malformedForLength(1);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,4 +109,8 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||||
controller.setOutPosition(outPos);
|
controller.setOutPosition(outPos);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean checkMidByte(byte b) {
|
||||||
|
return (b & 0xC0) == 0x80;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
*/
|
*/
|
||||||
package org.teavm.classlib.java.nio.charset.impl;
|
package org.teavm.classlib.java.nio.charset.impl;
|
||||||
|
|
||||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
|
||||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||||
|
|
||||||
|
@ -57,7 +56,7 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||||
outArray[outPos++] = (byte)(0xE0 | (ch >> 12));
|
outArray[outPos++] = (byte)(0xE0 | (ch >> 12));
|
||||||
outArray[outPos++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
|
outArray[outPos++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
|
||||||
outArray[outPos++] = (byte)(0x80 | (ch & 0x3F));
|
outArray[outPos++] = (byte)(0x80 | (ch & 0x3F));
|
||||||
} else if (UTF16Helper.isHighSurrogate(ch)) {
|
} else if (Character.isHighSurrogate(ch)) {
|
||||||
if (inPos >= inSize) {
|
if (inPos >= inSize) {
|
||||||
if (!controller.hasMoreInput()) {
|
if (!controller.hasMoreInput()) {
|
||||||
result = TCoderResult.UNDERFLOW;
|
result = TCoderResult.UNDERFLOW;
|
||||||
|
@ -65,9 +64,9 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
char low = inArray[inPos++];
|
char low = inArray[inPos++];
|
||||||
if (!UTF16Helper.isLowSurrogate(low)) {
|
if (!Character.isLowSurrogate(low)) {
|
||||||
inPos -= 2;
|
inPos -= 2;
|
||||||
result = TCoderResult.malformedForLength(2);
|
result = TCoderResult.malformedForLength(1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (outPos + 4 > outSize) {
|
if (outPos + 4 > outSize) {
|
||||||
|
@ -77,7 +76,7 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
int codePoint = UTF16Helper.buildCodePoint(ch, low);
|
int codePoint = Character.toCodePoint(ch, low);
|
||||||
outArray[outPos++] = (byte)(0xF0 | (codePoint >> 18));
|
outArray[outPos++] = (byte)(0xF0 | (codePoint >> 18));
|
||||||
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 12) & 0x3F));
|
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 12) & 0x3F));
|
||||||
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 6) & 0x3F));
|
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 6) & 0x3F));
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package org.teavm.classlib.java.nio.charset;
|
package org.teavm.classlib.java.nio.charset;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.*;
|
||||||
import static org.junit.Assert.assertTrue;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
@ -46,6 +46,76 @@ public class UTF8Test {
|
||||||
runDecode(100, 600);
|
runDecode(100, 600);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void replaceMalformedSurrogatePair() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
ByteBuffer buffer = charset.encode("a\uD800\uD800b");
|
||||||
|
byte[] result = new byte[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertArrayEquals(new byte[] { 97, 63, 63, 98 }, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void encodeSurrogate() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
ByteBuffer buffer = charset.encode("a\uD800\uDC00b");
|
||||||
|
byte[] result = new byte[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertArrayEquals(new byte[] { 97, -16, -112, -128, -128, 98 }, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void replaceMalformedFirstByte() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xFF, 98 }));
|
||||||
|
char[] result = new char[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertEquals("a\uFFFDb", new String(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void replaceMalformedMidByte() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xC0, 98, 98 }));
|
||||||
|
char[] result = new char[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertEquals("a\uFFFDbb", new String(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void replaceDecodedSurrogate() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xED, (byte)0xA0, (byte)0x80, 98 }));
|
||||||
|
char[] result = new char[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertEquals("a\uFFFDb", new String(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void replaceDecodedSurrogatePair() {
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xED, (byte)0xA0, (byte)0x80,
|
||||||
|
(byte)0xED, (byte)0xBF, (byte)0xBF, 98 }));
|
||||||
|
char[] result = new char[buffer.remaining()];
|
||||||
|
buffer.get(result);
|
||||||
|
assertEquals("a\uFFFD\uFFFDb", new String(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeLongUTF8ByteArray() throws UnsupportedEncodingException {
|
||||||
|
byte[] bytes = new byte[16384];
|
||||||
|
for (int i = 0; i < bytes.length;) {
|
||||||
|
bytes[i++] = -16;
|
||||||
|
bytes[i++] = -66;
|
||||||
|
bytes[i++] = -78;
|
||||||
|
bytes[i++] = -69;
|
||||||
|
}
|
||||||
|
Charset charset = Charset.forName("UTF-8");
|
||||||
|
CharBuffer buffer = charset.decode(ByteBuffer.wrap(bytes));
|
||||||
|
assertEquals('\uD8BB', buffer.get(8190));
|
||||||
|
assertEquals('\uDCBB', buffer.get(8191));
|
||||||
|
}
|
||||||
|
|
||||||
private void runEncode(int inSize, int outSize) {
|
private void runEncode(int inSize, int outSize) {
|
||||||
char[] input = text.toCharArray();
|
char[] input = text.toCharArray();
|
||||||
byte[] output = new byte[16384];
|
byte[] output = new byte[16384];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user