classlib: implement missing charsets

This commit is contained in:
Alexey Andreev 2021-03-30 19:45:55 +03:00
parent 9972fe0c29
commit aa8a91d030
25 changed files with 1286 additions and 181 deletions

View File

@ -45,7 +45,7 @@ public class TInputStreamReader extends TReader {
}
public TInputStreamReader(TInputStream in) {
this(in, new TUTF8Charset());
this(in, TUTF8Charset.INSTANCE);
}
public TInputStreamReader(TInputStream in, TCharsetDecoder decoder) {

View File

@ -33,7 +33,7 @@ public class TOutputStreamWriter extends TWriter {
private boolean closed;
public TOutputStreamWriter(TOutputStream out) {
this(nullCheck(out), new TUTF8Charset());
this(nullCheck(out), TUTF8Charset.INSTANCE);
}
public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException {

View File

@ -48,7 +48,7 @@ public class TPrintStream extends TFilterOutputStream {
public TPrintStream(TOutputStream out, boolean autoFlush) {
super(out);
this.autoFlush = autoFlush;
this.charset = new TUTF8Charset();
this.charset = TUTF8Charset.INSTANCE;
}
public TPrintStream(TOutputStream out) {

View File

@ -68,7 +68,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
public TString(byte[] bytes, int offset, int length) {
initWithBytes(bytes, offset, length, new TUTF8Charset());
initWithBytes(bytes, offset, length, TUTF8Charset.INSTANCE);
}
public TString(byte[] bytes) {
@ -574,7 +574,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
public byte[] getBytes() {
return getBytes(new TUTF8Charset());
return getBytes(TUTF8Charset.INSTANCE);
}
public byte[] getBytes(TCharset charset) {

View File

@ -18,7 +18,6 @@ package org.teavm.classlib.java.nio.charset;
import java.util.*;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
public abstract class TCharset implements Comparable<TCharset> {
private String canonicalName;
@ -143,7 +142,12 @@ public abstract class TCharset implements Comparable<TCharset> {
private static final Map<String, TCharset> value = new HashMap<>();
static {
value.put("UTF-8", new TUTF8Charset());
TCharset[] charsets = { TStandardCharsets.UTF_8, TStandardCharsets.US_ASCII,
TStandardCharsets.ISO_8859_1, TStandardCharsets.UTF_16, TStandardCharsets.UTF_16BE,
TStandardCharsets.UTF_16LE };
for (TCharset charset : charsets) {
value.put(charset.name(), charset);
}
}
}
}

View File

@ -15,6 +15,8 @@
*/
package org.teavm.classlib.java.nio.charset;
import java.util.Objects;
public class TCoderResult {
public static final TCoderResult UNDERFLOW = new TCoderResult((byte) 0, 0);
public static final TCoderResult OVERFLOW = new TCoderResult((byte) 1, 0);
@ -90,4 +92,21 @@ public class TCoderResult {
throw new AssertionError();
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TCoderResult that = (TCoderResult) o;
return kind == that.kind && length == that.length;
}
@Override
public int hashCode() {
return Objects.hash(kind, length);
}
}

View File

@ -15,11 +15,19 @@
*/
package org.teavm.classlib.java.nio.charset;
import org.teavm.classlib.java.nio.charset.impl.TAsciiCharset;
import org.teavm.classlib.java.nio.charset.impl.TIso8859Charset;
import org.teavm.classlib.java.nio.charset.impl.TUTF16Charset;
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
public final class TStandardCharsets {
private TStandardCharsets() {
}
public static final TCharset UTF_8 = new TUTF8Charset();
public static final TCharset UTF_8 = TUTF8Charset.INSTANCE;
public static final TCharset US_ASCII = new TAsciiCharset();
public static final TCharset ISO_8859_1 = new TIso8859Charset();
public static final TCharset UTF_16 = new TUTF16Charset("UTF-16", true, false);
public static final TCharset UTF_16BE = new TUTF16Charset("UTF-16BE", false, false);
public static final TCharset UTF_16LE = new TUTF16Charset("UTF-16LE", false, true);
}

View File

@ -0,0 +1,41 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
public class TAsciiCharset extends TCharset {
public TAsciiCharset() {
super("US-ASCII", new String[0]);
}
@Override
public boolean contains(TCharset cs) {
return cs == this;
}
@Override
public TCharsetDecoder newDecoder() {
return new TAsciiDecoder(this);
}
@Override
public TCharsetEncoder newEncoder() {
return new TAsciiEncoder(this);
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TAsciiDecoder extends TBufferedDecoder {
public TAsciiDecoder(TCharset cs) {
super(cs, 1, 1);
}
@Override
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
int b = inArray[inPos++] & 0xFf;
if ((b & 0x80) != 0) {
result = TCoderResult.malformedForLength(1);
--inPos;
break;
} else {
outArray[outPos++] = (char) b;
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -0,0 +1,65 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TAsciiEncoder extends TBufferedEncoder {
public TAsciiEncoder(TCharset cs) {
super(cs, 1, 1);
}
@Override
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
char c = inArray[inPos++];
if (Character.isHighSurrogate(c)) {
if (inPos >= inSize) {
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
} else {
inPos--;
}
break;
} else {
char next = inArray[inPos];
if (!Character.isLowSurrogate(next)) {
result = TCoderResult.malformedForLength(1);
} else {
--inPos;
result = TCoderResult.unmappableForLength(2);
}
break;
}
} else if (Character.isLowSurrogate(c)) {
result = TCoderResult.malformedForLength(1);
}
if (c < 128) {
outArray[outPos++] = (byte) c;
} else {
result = TCoderResult.unmappableForLength(1);
--inPos;
break;
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public abstract class TBufferedDecoder extends TCharsetDecoder {
private byte[] inArray = new byte[512];
private char[] outArray = new char[512];
public TBufferedDecoder(TCharset cs, float averageCharsPerByte, float maxCharsPerByte) {
super(cs, averageCharsPerByte, maxCharsPerByte);
}
@ -29,12 +32,11 @@ public abstract class TBufferedDecoder extends TCharsetDecoder {
@Override
protected TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out) {
// Use intermediate array to batch buffer operations
int outPos = 0;
byte[] inArray = new byte[Math.min(in.remaining(), 512)];
byte[] inArray = this.inArray;
int inPos = 0;
int inSize = 0;
char[] outArray = new char[Math.min(out.remaining(), 512)];
TCoderResult result = null;
char[] outArray = this.outArray;
TCoderResult result;
while (true) {
// If there were remaining bytes in input buffer, copy them to the beginning of input array
@ -55,7 +57,7 @@ public abstract class TBufferedDecoder extends TCharsetDecoder {
}
// Perform iteration
outPos = 0;
int outPos = 0;
int outSize = Math.min(out.remaining(), outArray.length);
Controller controller = new Controller(in, out);
result = arrayDecode(inArray, inPos, inSize, outArray, outPos, outSize, controller);

View File

@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public abstract class TBufferedEncoder extends TCharsetEncoder {
private char[] inArray = new char[512];
private byte[] outArray = new byte[512];
public TBufferedEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) {
super(cs, averageBytesPerChar, maxBytesPerChar, replacement);
}
@ -33,12 +36,11 @@ public abstract class TBufferedEncoder extends TCharsetEncoder {
@Override
protected TCoderResult encodeLoop(TCharBuffer in, TByteBuffer out) {
// Use intermediate array to batch buffer operations
int outPos = 0;
char[] inArray = new char[Math.min(in.remaining(), 512)];
char[] inArray = this.inArray;
int inPos = 0;
int inSize = 0;
byte[] outArray = new byte[Math.min(out.remaining(), 512)];
TCoderResult result = null;
byte[] outArray = this.outArray;
TCoderResult result;
while (true) {
// If there were remaining bytes in input buffer, copy them to the beginning of input array
@ -59,15 +61,19 @@ public abstract class TBufferedEncoder extends TCharsetEncoder {
}
// Perform iteration
outPos = 0;
int outPos = 0;
int outSize = Math.min(out.remaining(), outArray.length);
Controller controller = new Controller(in, out);
result = arrayEncode(inArray, inPos, inSize, outArray, outPos, outSize, controller);
inPos = controller.inPosition;
if (result == null && outPos == controller.outPosition) {
result = TCoderResult.UNDERFLOW;
}
outPos = controller.outPosition;
if (result == null) {
if (!in.hasRemaining() && inPos >= inSize) {
result = TCoderResult.UNDERFLOW;
} else if (!out.hasRemaining() && inPos >= inSize) {
result = TCoderResult.OVERFLOW;
}
}
// Write any output characters to out buffer
out.put(outArray, 0, outPos);

View File

@ -0,0 +1,41 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
public class TIso8859Charset extends TCharset {
public TIso8859Charset() {
super("ISO-8859-1", new String[0]);
}
@Override
public boolean contains(TCharset cs) {
return cs == this;
}
@Override
public TCharsetDecoder newDecoder() {
return new TIso8859Decoder(this);
}
@Override
public TCharsetEncoder newEncoder() {
return new TIso8859Encoder(this);
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TIso8859Decoder extends TBufferedDecoder {
public TIso8859Decoder(TCharset cs) {
super(cs, 1, 1);
}
@Override
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
int b = inArray[inPos++] & 0xFf;
outArray[outPos++] = (char) b;
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -0,0 +1,65 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TIso8859Encoder extends TBufferedEncoder {
public TIso8859Encoder(TCharset cs) {
super(cs, 1, 1);
}
@Override
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
char c = inArray[inPos++];
if (Character.isHighSurrogate(c)) {
if (inPos == inSize) {
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
} else {
inPos--;
}
break;
} else {
char next = inArray[inPos];
if (!Character.isLowSurrogate(next)) {
result = TCoderResult.malformedForLength(1);
} else {
--inPos;
result = TCoderResult.unmappableForLength(2);
}
break;
}
} else if (Character.isLowSurrogate(c)) {
result = TCoderResult.malformedForLength(1);
}
if (c < 256) {
outArray[outPos++] = (byte) c;
} else {
--inPos;
result = TCoderResult.unmappableForLength(1);
break;
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
public class TUTF16Charset extends TCharset {
private boolean bom;
private boolean littleEndian;
public TUTF16Charset(String canonicalName, boolean bom, boolean littleEndian) {
super(canonicalName, new String[0]);
this.bom = bom;
this.littleEndian = littleEndian;
}
@Override
public boolean contains(TCharset cs) {
return cs == this;
}
@Override
public TCharsetDecoder newDecoder() {
return new TUTF16Decoder(this, bom, littleEndian);
}
@Override
public TCharsetEncoder newEncoder() {
return new TUTF16Encoder(this, bom, littleEndian);
}
}

View File

@ -0,0 +1,167 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TUTF16Decoder extends TBufferedDecoder {
private boolean bom;
private boolean littleEndian;
public TUTF16Decoder(TCharset cs, boolean bom, boolean littleEndian) {
super(cs, 0.5f, 0.5f);
this.bom = bom;
this.littleEndian = littleEndian;
}
@Override
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
Controller controller) {
if (bom) {
if (inPos + 2 > inSize) {
return controller.hasMoreInput() ? null : TCoderResult.UNDERFLOW;
}
bom = false;
byte b = inArray[inPos++];
if (b == (byte) 0xFF) {
if (inArray[inPos] == (byte) 0xFE) {
inPos++;
littleEndian = true;
} else {
inPos--;
}
} else if (b == (byte) 0xFE) {
if (inArray[inPos] == (byte) 0xFF) {
inPos++;
littleEndian = false;
} else {
inPos--;
}
} else {
inPos--;
}
}
return littleEndian
? decodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller)
: decodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller);
}
private TCoderResult decodeLE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
if (inPos + 2 > inSize) {
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
}
break;
}
int b1 = inArray[inPos++] & 0xFF;
int b2 = inArray[inPos++] & 0xFF;
char c = (char) (b1 | (b2 << 8));
if (Character.isHighSurrogate(c)) {
if (inPos + 2 >= inSize) {
if (!controller.hasMoreInput(4)) {
result = TCoderResult.UNDERFLOW;
}
inPos -= 2;
break;
}
b1 = inArray[inPos++] & 0xFF;
b2 = inArray[inPos++] & 0xFF;
char next = (char) (b1 | (b2 << 8));
if (!Character.isLowSurrogate(next)) {
inPos -= 4;
result = TCoderResult.malformedForLength(4);
break;
} else {
if (outPos + 2 > outSize) {
if (!controller.hasMoreOutput(2)) {
result = TCoderResult.OVERFLOW;
}
break;
} else {
outArray[outPos++] = c;
outArray[outPos++] = next;
}
}
} else if (Character.isLowSurrogate(c)) {
inPos -= 2;
result = TCoderResult.malformedForLength(2);
break;
} else {
outArray[outPos++] = c;
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
private TCoderResult decodeBE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
if (inPos + 2 > inSize) {
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
}
break;
}
int b1 = inArray[inPos++] & 0xFF;
int b2 = inArray[inPos++] & 0xFF;
char c = (char) (b2 | (b1 << 8));
if (Character.isHighSurrogate(c)) {
if (inPos + 2 >= inSize) {
if (!controller.hasMoreInput(4)) {
result = TCoderResult.UNDERFLOW;
}
inPos -= 2;
break;
}
b1 = inArray[inPos++] & 0xFF;
b2 = inArray[inPos++] & 0xFF;
char next = (char) (b2 | (b1 << 8));
if (!Character.isLowSurrogate(next)) {
inPos -= 4;
result = TCoderResult.malformedForLength(4);
break;
} else {
if (outPos + 2 > outSize) {
if (!controller.hasMoreOutput(2)) {
result = TCoderResult.OVERFLOW;
}
break;
} else {
outArray[outPos++] = c;
outArray[outPos++] = next;
}
}
} else if (Character.isLowSurrogate(c)) {
inPos -= 2;
result = TCoderResult.malformedForLength(2);
break;
} else {
outArray[outPos++] = c;
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -0,0 +1,162 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCoderResult;
public class TUTF16Encoder extends TBufferedEncoder {
private boolean bom;
private boolean littleEndian;
public TUTF16Encoder(TCharset cs, boolean bom, boolean littleEndian) {
super(cs, 2, 4);
this.bom = bom;
this.littleEndian = littleEndian;
}
@Override
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
Controller controller) {
if (bom) {
if (outPos + 2 > outSize) {
return controller.hasMoreOutput() ? null : TCoderResult.OVERFLOW;
}
bom = false;
if (littleEndian) {
outArray[outPos++] = (byte) 0xFF;
outArray[outPos++] = (byte) 0xFE;
} else {
outArray[outPos++] = (byte) 0xFE;
outArray[outPos++] = (byte) 0xFF;
}
}
return littleEndian
? arrayEncodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller)
: arrayEncodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller);
}
private TCoderResult arrayEncodeLE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
char c = inArray[inPos++];
if (Character.isHighSurrogate(c)) {
if (inPos == inSize) {
inPos--;
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
}
break;
}
char next = inArray[inPos++];
if (Character.isLowSurrogate(next)) {
if (outPos + 4 <= outSize) {
outArray[outPos++] = (byte) (c & 0xFF);
outArray[outPos++] = (byte) (c >> 8);
outArray[outPos++] = (byte) (next & 0xFF);
outArray[outPos++] = (byte) (next >> 8);
} else {
inPos -= 2;
if (!controller.hasMoreOutput(4)) {
result = TCoderResult.OVERFLOW;
}
break;
}
} else {
inPos -= 2;
result = TCoderResult.malformedForLength(1);
break;
}
} else if (Character.isLowSurrogate(c)) {
inPos--;
result = TCoderResult.malformedForLength(1);
break;
} else {
if (outPos + 2 <= outSize) {
outArray[outPos++] = (byte) (c & 0xFF);
outArray[outPos++] = (byte) (c >> 8);
} else {
inPos--;
if (!controller.hasMoreOutput(2)) {
result = TCoderResult.OVERFLOW;
}
break;
}
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
private TCoderResult arrayEncodeBE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
Controller controller) {
TCoderResult result = null;
while (inPos < inSize && outPos < outSize) {
char c = inArray[inPos++];
if (Character.isHighSurrogate(c)) {
if (inPos == inSize) {
inPos--;
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
}
break;
}
char next = inArray[inPos++];
if (Character.isLowSurrogate(next)) {
if (outPos + 4 <= outSize) {
outArray[outPos++] = (byte) (c >> 8);
outArray[outPos++] = (byte) (c & 0xFF);
outArray[outPos++] = (byte) (next >> 8);
outArray[outPos++] = (byte) (next & 0xFF);
} else {
inPos -= 2;
if (!controller.hasMoreOutput(4)) {
result = TCoderResult.OVERFLOW;
}
break;
}
} else {
inPos -= 2;
result = TCoderResult.malformedForLength(1);
break;
}
} else if (Character.isLowSurrogate(c)) {
inPos--;
result = TCoderResult.malformedForLength(1);
break;
} else {
if (outPos + 2 <= outSize) {
outArray[outPos++] = (byte) (c >> 8);
outArray[outPos++] = (byte) (c & 0xFF);
} else {
inPos--;
if (!controller.hasMoreOutput(2)) {
result = TCoderResult.OVERFLOW;
}
break;
}
}
}
controller.setInPosition(inPos);
controller.setOutPosition(outPos);
return result;
}
}

View File

@ -20,7 +20,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
public class TUTF8Charset extends TCharset {
public TUTF8Charset() {
public static final TUTF8Charset INSTANCE = new TUTF8Charset();
private TUTF8Charset() {
super("UTF-8", new String[0]);
}

View File

@ -34,7 +34,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
} else if ((b & 0xE0) == 0xC0) {
if (inPos >= inSize) {
--inPos;
if (!controller.hasMoreInput()) {
if (!controller.hasMoreInput(2)) {
result = TCoderResult.UNDERFLOW;
}
break;
@ -49,7 +49,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
} else if ((b & 0xF0) == 0xE0) {
if (inPos + 2 > inSize) {
--inPos;
if (!controller.hasMoreInput()) {
if (!controller.hasMoreInput(3)) {
result = TCoderResult.UNDERFLOW;
}
break;
@ -71,7 +71,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
} else if ((b & 0xF8) == 0xF0) {
if (inPos + 3 > inSize) {
--inPos;
if (!controller.hasMoreInput()) {
if (!controller.hasMoreInput(4)) {
result = TCoderResult.UNDERFLOW;
}
break;

View File

@ -0,0 +1,89 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.teavm.junit.TeaVMTestRunner;
import org.teavm.junit.WholeClassCompilation;
@RunWith(TeaVMTestRunner.class)
@WholeClassCompilation
public class AsciiTest {
private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText));
private static byte[] convert(String value) {
byte[] result = new byte[value.length()];
for (int i = 0; i < value.length(); ++i) {
result[i] = (byte) value.charAt(i);
}
return result;
}
@Test
public void encode1() {
runEncode(600, 600);
}
@Test
public void encode2() {
runEncode(600, 100);
}
@Test
public void encode3() {
runEncode(100, 600);
}
@Test
public void decode1() {
runDecode(600, 600);
}
@Test
public void decode2() {
runDecode(600, 100);
}
@Test
public void decode3() {
runDecode(100, 600);
}
@Test
public void encodeUnmappable() {
Charset charset = StandardCharsets.US_ASCII;
CharsetTestCommon.checkUnmappable(charset, "ц", 1);
CharsetTestCommon.checkUnmappable(charset, "£", 1);
CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2);
}
@Test
public void decodeMalformed() {
Charset charset = StandardCharsets.US_ASCII;
CharsetTestCommon.checkMalformed(charset, new byte[] { (byte) 0xA3 }, 1);
}
private void runEncode(int inSize, int outSize) {
CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize);
}
private void runDecode(int inSize, int outSize) {
CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize);
}
}

View File

@ -0,0 +1,207 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
import static org.junit.Assert.assertEquals;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;
final class CharsetTestCommon {
private static char[] hexDigits = "0123456789ABCDEF".toCharArray();
static final String text = ""
+ "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь "
+ "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, могли ясно "
+ "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас выше, что "
+ "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, произошла "
+ "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного странного "
+ "обстоятельства, может быть вовсе не связанного ничем с ходом дела. "
+ "Дней десять назад зашел ко мне Рогожин, "
+ "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина прежде, "
+ "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и приходил "
+ "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, "
+ "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой день сам, "
+ "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего продолжать "
+ "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. "
+ "Между нами был такой контраст, "
+ "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, а он - "
+ "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о „последних“ "
+ "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на чем он "
+ "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому литератору, не "
+ "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек с умом и "
+ "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем „последнем "
+ "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. "
+ "Он промолчал, он ужасно молчалив. "
+ "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - "
+ "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам вовсе не "
+ "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и кислою "
+ "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и просто-запросто вывел "
+ "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил меня; "
+ "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, "
+ "непосредственная жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке.";
static String asciiText = ""
+ "Meanwhile, the various members of Sleary`s company gradually gathered together from the upper "
+ "regions, where they were quartered, and, from standing about, talking in low voices to one another "
+ "and to Mr. Childers, gradually insinuated themselves and him into the room. There were two or three "
+ "handsome young women among them, with their two or three husbands, and their two or three mothers, "
+ "and their eight or nine little children, who did the fairy business when required. "
+ "The father of one of the families was in the habit of balancing the father of another of the families "
+ "on the top of a great pole; the father of a third family often made a pyramid of both those fathers, "
+ "with Master Kidderminster for the apex, and himself for the base; all the fathers could dance upon "
+ "rolling casks, stand upon bottles, catch knives and balls, twirl hand-basins, ride upon anything, "
+ "jump over everything, and stick at nothing. All the mothers could (and did) dance, upon the slack "
+ "wire and the tight-rope, and perform rapid acts on bare-backed steeds; none of them were at all "
+ "particular in respect of showing their legs; and one of them, alone in a Greek chariot, drove six "
+ "in hand into every town they came to. They all assumed to be mighty rakish and knowing, they were "
+ "not very tidy in their private dresses, they were not at all orderly in their domestic arrangements, "
+ "and the combined literature of the whole company would have produced but a poor letter on any subject. "
+ "Yet there was a remarkable gentleness and childishness about these people, a special inaptitude "
+ "for any kind of sharp practice, and an untiring readiness to help and pity one another, deserving "
+ "often of as much respect, and always of as much generous construction, as the every-day virtues of "
+ "any class of people in the world.";
private CharsetTestCommon() {
}
static void runEncode(String hex, String text, Charset charset, int inSize, int outSize) {
char[] input = text.toCharArray();
byte[] output = new byte[16384];
int inPos = 0;
int outPos = 0;
CharsetEncoder encoder = charset.newEncoder();
CoderResult result;
do {
int inLen = Math.min(inSize, input.length - inPos);
CharBuffer in = CharBuffer.wrap(input, inPos, inLen);
int outLen = Math.min(outSize, output.length - outPos);
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
result = encoder.encode(in, out, inPos + inLen >= input.length);
inPos = in.position();
outPos = out.position();
} while (!result.isError() && inPos < input.length);
assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result);
do {
int outLen = Math.min(outSize, output.length - outPos);
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
result = encoder.flush(out);
outPos = out.position();
} while (!result.isUnderflow());
assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result);
output = Arrays.copyOf(output, outPos);
assertEquals(hex, bytesToHex(output));
}
static void runDecode(String hex, String text, Charset charset, int inSize, int outSize) {
byte[] input = hexToBytes(hex);
char[] output = new char[16384];
int inPos = 0;
int outPos = 0;
CharsetDecoder decoder = charset.newDecoder();
CoderResult result;
do {
int inLen = Math.min(inSize, input.length - inPos);
ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen);
int outLen = Math.min(outSize, output.length - outPos);
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
result = decoder.decode(in, out, inPos + inLen >= input.length);
inPos = in.position();
outPos = out.position();
} while (!result.isError() && inPos < input.length);
assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result);
do {
int outLen = Math.min(outSize, output.length - outPos);
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
result = decoder.flush(out);
outPos = out.position();
} while (!result.isUnderflow());
assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result);
output = Arrays.copyOf(output, outPos);
assertEquals(text, new String(output));
}
static String bytesToHex(byte[] bytes) {
char[] result = new char[bytes.length * 2];
int j = 0;
for (int i = 0; i < bytes.length; ++i) {
int b = bytes[i] & 0xFF;
result[j++] = hexDigits[b >> 4];
result[j++] = hexDigits[b & 0xF];
}
return new String(result);
}
static byte[] hexToBytes(String hex) {
char[] chars = hex.toCharArray();
byte[] result = new byte[chars.length / 2];
int j = 0;
for (int i = 0; i < chars.length; i += 2) {
char hi = chars[i];
char lo = chars[i + 1];
result[j++] = (byte) ((digit(hi) << 4) | digit(lo));
}
return result;
}
private static int digit(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
return c - 'A' + 10;
}
static void checkUnmappable(Charset charset, String text, int length) {
CharBuffer input = CharBuffer.wrap(text);
byte[] result = new byte[100];
ByteBuffer output = ByteBuffer.wrap(result);
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
assertEquals(CoderResult.unmappableForLength(length), coderResult);
assertEquals(0, input.position());
assertEquals(0, output.position());
}
static void checkMalformed(Charset charset, String text, int length) {
CharBuffer input = CharBuffer.wrap(text);
byte[] result = new byte[100];
ByteBuffer output = ByteBuffer.wrap(result);
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
assertEquals(CoderResult.malformedForLength(length), coderResult);
assertEquals(0, input.position());
assertEquals(0, output.position());
}
static void checkMalformed(Charset charset, byte[] data, int length) {
ByteBuffer input = ByteBuffer.wrap(data);
CharBuffer output = CharBuffer.wrap(new char[100]);
CoderResult coderResult = charset.newDecoder().decode(input, output, true);
assertEquals(CoderResult.malformedForLength(length), coderResult);
assertEquals(0, input.position());
assertEquals(0, output.position());
}
}

View File

@ -0,0 +1,94 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.teavm.junit.TeaVMTestRunner;
import org.teavm.junit.WholeClassCompilation;
@RunWith(TeaVMTestRunner.class)
@WholeClassCompilation
public class Iso8859Test {
private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText));
private static byte[] convert(String value) {
byte[] result = new byte[value.length()];
for (int i = 0; i < value.length(); ++i) {
result[i] = (byte) value.charAt(i);
}
return result;
}
@Test
public void encode1() {
runEncode(600, 600);
}
@Test
public void encode2() {
runEncode(600, 100);
}
@Test
public void encode3() {
runEncode(100, 600);
}
@Test
public void decode1() {
runDecode(600, 600);
}
@Test
public void decode2() {
runDecode(600, 100);
}
@Test
public void decode3() {
runDecode(100, 600);
}
@Test
public void encodeUnmappable() {
Charset charset = StandardCharsets.ISO_8859_1;
CharsetTestCommon.checkUnmappable(charset, "ц", 1);
CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2);
}
@Test
public void decodeNonAscii() {
assertEquals("£", new String(new byte[] { (byte) 0xA3 }, StandardCharsets.ISO_8859_1));
}
@Test
public void encodeNonAscii() {
assertArrayEquals(new byte[] { (byte) 0xA3 }, "£".getBytes(StandardCharsets.ISO_8859_1));
}
private void runEncode(int inSize, int outSize) {
CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize);
}
private void runDecode(int inSize, int outSize) {
CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize);
}
}

View File

@ -0,0 +1,134 @@
/*
* Copyright 2021 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
import static org.junit.Assert.assertEquals;
import java.nio.charset.StandardCharsets;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.teavm.junit.TeaVMTestRunner;
import org.teavm.junit.WholeClassCompilation;
@RunWith(TeaVMTestRunner.class)
@WholeClassCompilation
public class UTF16Test {
private static String hexLE = CharsetTestCommon.bytesToHex(convertLE(CharsetTestCommon.text));
private static String hexBE = CharsetTestCommon.bytesToHex(convertBE(CharsetTestCommon.text));
private static String hexLEBom = "FFFE" + hexLE;
private static String hexBEBom = "FEFF" + hexBE;
private static boolean littleEndian = "0".getBytes(StandardCharsets.UTF_16)[0] == 0xFF;
private static byte[] convertLE(String text) {
byte[] data = new byte[text.length() * 2];
for (int i = 0; i < text.length(); ++i) {
char c = text.charAt(i);
data[i * 2] = (byte) (c & 0xFF);
data[i * 2 + 1] = (byte) ((c >> 8) & 0xFF);
}
return data;
}
private static byte[] convertBE(String text) {
byte[] data = new byte[text.length() * 2];
for (int i = 0; i < text.length(); ++i) {
char c = text.charAt(i);
data[i * 2] = (byte) ((c >> 8) & 0xFF);
data[i * 2 + 1] = (byte) (c & 0xFF);
}
return data;
}
@Test
public void encode1() {
runEncode(600, 600);
}
@Test
public void encode2() {
runEncode(600, 100);
}
@Test
public void encode3() {
runEncode(100, 600);
}
@Test
public void encode4() {
runEncode(600, 99);
}
@Test
public void decode1() {
runDecode(600, 600);
}
@Test
public void decode2() {
runDecode(600, 100);
}
@Test
public void decode3() {
runDecode(100, 600);
}
@Test
public void decode4() {
runDecode(99, 600);
}
@Test
public void encodeMalformedSurrogate() {
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800\uD800", 1);
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800a", 1);
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uDC00\uD800", 1);
}
@Test
public void encodeSurrogate() {
String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE));
assertEquals("D800DC00", hex);
}
@Test
public void decodeSurrogate() {
String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE));
assertEquals("D800DC00", hex);
}
@Test
public void decodeMalformedSurrogate() {
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D800D800"), 4);
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D8000041"), 4);
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("DC00D800"), 2);
}
private void runEncode(int inSize, int outSize) {
CharsetTestCommon.runEncode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize);
CharsetTestCommon.runEncode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize);
CharsetTestCommon.runEncode(littleEndian ? hexLEBom : hexBEBom, CharsetTestCommon.text,
StandardCharsets.UTF_16, inSize, outSize);
}
private void runDecode(int inSize, int outSize) {
CharsetTestCommon.runDecode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize);
CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize);
CharsetTestCommon.runDecode(hexBEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
CharsetTestCommon.runDecode(hexLEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
}
}

View File

@ -17,13 +17,11 @@ package org.teavm.classlib.java.nio.charset;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -33,54 +31,8 @@ import org.teavm.junit.WholeClassCompilation;
@RunWith(TeaVMTestRunner.class)
@WholeClassCompilation
public class UTF8Test {
private static char[] hexDigits = "0123456789ABCDEF".toCharArray();
// Fragment from "The Idiot" by F. Dostoevsky
private String text =
"" + "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь "
+ "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, "
+ "могли ясно "
+ "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас "
+ "выше, что "
+ "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, "
+ "произошла "
+ "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного "
+ "странного " + "обстоятельства, может быть вовсе не связанного ничем с ходом дела. "
+ "Дней десять назад зашел ко мне Рогожин, "
+ "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина "
+ "прежде, "
+ "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и "
+ "приходил "
+ "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, "
+ "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой "
+ "день сам, "
+ "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего "
+ "продолжать " + "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. "
+ "Между нами был такой контраст, "
+ "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, "
+ "а он - "
+ "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о "
+ "„последних“ "
+ "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на "
+ "чем он " + "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому "
+ "литератору, не "
+ "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек"
+ " с умом и "
+ "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем "
+ "„последнем " + "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. "
+ "Он промолчал, он ужасно молчалив. "
+ "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - "
+ "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам "
+ "вовсе не "
+ "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и "
+ "кислою " + "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и "
+ "просто-запросто вывел "
+ "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил "
+ "меня; " + "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, "
+ "непосредственная "
+ "жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке.";
private String hex = ""
+
"D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED"
+ "D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED"
+ "182D0BCD0B5D187D0B0D18E20D0B2D181D0B520D18DD182D0B820D186D0B8D184D180D18B20D0B820D187D0B8D181D0BBD"
+ "0B02E20D09CD0BDD0B52C20D0BAD0BED0BDD0B5D187D0BDD0BE2C20D0B2D181D19120D180D0B0D0B2D0BDD0BE20D0B1D18"
+ "3D0B4D0B5D1822C20D0BDD0BE20D182D0B5D0BFD0B5D180D18C2028D0B82C20D0BCD0BED0B6D0B5D18220D0B1D18BD182D"
@ -210,16 +162,29 @@ public class UTF8Test {
@Test
public void replaceMalformedSurrogatePair() {
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
ByteBuffer buffer = charset.encode("a\uD800\uD800b");
byte[] result = new byte[buffer.remaining()];
buffer.get(result);
assertArrayEquals(new byte[] { 97, 63, 63, 98 }, result);
}
@Test
public void malformedSurrogatePair() {
Charset charset = StandardCharsets.UTF_8;
byte[] result = new byte[100];
CharBuffer input = CharBuffer.wrap("a\uD800\uD800b");
ByteBuffer output = ByteBuffer.wrap(result);
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
assertEquals(CoderResult.malformedForLength(1), coderResult);
result = Arrays.copyOf(result, output.position());
assertArrayEquals(new byte[] { 97 }, result);
assertEquals(1, input.position());
}
@Test
public void encodeSurrogate() {
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
ByteBuffer buffer = charset.encode("a\uD800\uDC00b");
byte[] result = new byte[buffer.remaining()];
buffer.get(result);
@ -228,7 +193,7 @@ public class UTF8Test {
@Test
public void encodeSupplementary() {
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
StringBuilder sb = new StringBuilder();
sb.appendCodePoint(0xfedcb);
ByteBuffer buffer = charset.encode(sb.toString());
@ -239,7 +204,7 @@ public class UTF8Test {
@Test
public void replaceMalformedFirstByte() {
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xFF, 98 }));
char[] result = new char[buffer.remaining()];
buffer.get(result);
@ -248,7 +213,7 @@ public class UTF8Test {
@Test
public void replaceMalformedMidByte() {
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xC0, 98, 98 }));
char[] result = new char[buffer.remaining()];
buffer.get(result);
@ -265,115 +230,17 @@ public class UTF8Test {
bytes[i++] = -78;
bytes[i++] = -69;
}
Charset charset = Charset.forName("UTF-8");
Charset charset = StandardCharsets.UTF_8;
CharBuffer buffer = charset.decode(ByteBuffer.wrap(bytes));
assertEquals('\uD8BB', buffer.get(8190));
assertEquals('\uDCBB', buffer.get(8191));
}
private void runEncode(int inSize, int outSize) {
char[] input = text.toCharArray();
byte[] output = new byte[16384];
int inPos = 0;
int outPos = 0;
CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
CoderResult result = CoderResult.UNDERFLOW;
while (true) {
int inLen = Math.min(inSize, input.length - inPos);
CharBuffer in = CharBuffer.wrap(input, inPos, inLen);
int outLen = Math.min(outSize, output.length - outPos);
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
result = encoder.encode(in, out, inPos + inLen >= input.length);
inPos = in.position();
outPos = out.position();
if (result.isError() || inPos >= input.length) {
break;
}
}
assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow());
while (true) {
int outLen = Math.min(outSize, output.length - outPos);
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
result = encoder.flush(out);
outPos = out.position();
if (result.isUnderflow()) {
break;
}
}
assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow());
output = Arrays.copyOf(output, outPos);
assertEquals(hex, bytesToHex(output));
CharsetTestCommon.runEncode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize);
}
private void runDecode(int inSize, int outSize) {
byte[] input = hexToBytes(hex);
char[] output = new char[16384];
int inPos = 0;
int outPos = 0;
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
CoderResult result = CoderResult.UNDERFLOW;
while (true) {
int inLen = Math.min(inSize, input.length - inPos);
ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen);
int outLen = Math.min(outSize, output.length - outPos);
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
result = decoder.decode(in, out, inPos + inLen >= input.length);
inPos = in.position();
outPos = out.position();
if (result.isError() || inPos >= input.length) {
break;
}
}
assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow());
while (true) {
int outLen = Math.min(outSize, output.length - outPos);
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
result = decoder.flush(out);
outPos = out.position();
if (result.isUnderflow()) {
break;
}
}
assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow());
output = Arrays.copyOf(output, outPos);
assertEquals(text, new String(output));
}
private String bytesToHex(byte[] bytes) {
char[] result = new char[bytes.length * 2];
int j = 0;
for (int i = 0; i < bytes.length; ++i) {
int b = bytes[i] & 0xFF;
result[j++] = hexDigits[b >> 4];
result[j++] = hexDigits[b & 0xF];
}
return new String(result);
}
private byte[] hexToBytes(String hex) {
char[] chars = hex.toCharArray();
byte[] result = new byte[chars.length / 2];
int j = 0;
for (int i = 0; i < chars.length; i += 2) {
char hi = chars[i];
char lo = chars[i + 1];
result[j++] = (byte) ((digit(hi) << 4) | digit(lo));
}
return result;
}
private static int digit(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
return c - 'A' + 10;
CharsetTestCommon.runDecode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize);
}
}