From aa8a91d030ea13709b518ab368bc5954ed9466ea Mon Sep 17 00:00:00 2001 From: Alexey Andreev Date: Tue, 30 Mar 2021 19:45:55 +0300 Subject: [PATCH] classlib: implement missing charsets --- .../classlib/java/io/TInputStreamReader.java | 2 +- .../classlib/java/io/TOutputStreamWriter.java | 2 +- .../teavm/classlib/java/io/TPrintStream.java | 2 +- .../org/teavm/classlib/java/lang/TString.java | 4 +- .../classlib/java/nio/charset/TCharset.java | 8 +- .../java/nio/charset/TCoderResult.java | 19 ++ .../java/nio/charset/TStandardCharsets.java | 10 +- .../java/nio/charset/impl/TAsciiCharset.java | 41 ++++ .../java/nio/charset/impl/TAsciiDecoder.java | 46 ++++ .../java/nio/charset/impl/TAsciiEncoder.java | 65 ++++++ .../nio/charset/impl/TBufferedDecoder.java | 12 +- .../nio/charset/impl/TBufferedEncoder.java | 22 +- .../nio/charset/impl/TIso8859Charset.java | 41 ++++ .../nio/charset/impl/TIso8859Decoder.java | 40 ++++ .../nio/charset/impl/TIso8859Encoder.java | 65 ++++++ .../java/nio/charset/impl/TUTF16Charset.java | 46 ++++ .../java/nio/charset/impl/TUTF16Decoder.java | 167 ++++++++++++++ .../java/nio/charset/impl/TUTF16Encoder.java | 162 ++++++++++++++ .../java/nio/charset/impl/TUTF8Charset.java | 4 +- .../java/nio/charset/impl/TUTF8Decoder.java | 6 +- .../classlib/java/nio/charset/AsciiTest.java | 89 ++++++++ .../java/nio/charset/CharsetTestCommon.java | 207 ++++++++++++++++++ .../java/nio/charset/Iso8859Test.java | 94 ++++++++ .../classlib/java/nio/charset/UTF16Test.java | 134 ++++++++++++ .../classlib/java/nio/charset/UTF8Test.java | 179 ++------------- 25 files changed, 1286 insertions(+), 181 deletions(-) create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiCharset.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiDecoder.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiEncoder.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Charset.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Decoder.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Encoder.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Charset.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Decoder.java create mode 100644 classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Encoder.java create mode 100644 tests/src/test/java/org/teavm/classlib/java/nio/charset/AsciiTest.java create mode 100644 tests/src/test/java/org/teavm/classlib/java/nio/charset/CharsetTestCommon.java create mode 100644 tests/src/test/java/org/teavm/classlib/java/nio/charset/Iso8859Test.java create mode 100644 tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF16Test.java diff --git a/classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java b/classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java index 894731623..73f6d527a 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java +++ b/classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java @@ -45,7 +45,7 @@ public class TInputStreamReader extends TReader { } public TInputStreamReader(TInputStream in) { - this(in, new TUTF8Charset()); + this(in, TUTF8Charset.INSTANCE); } public TInputStreamReader(TInputStream in, TCharsetDecoder decoder) { diff --git a/classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java b/classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java index c68c84f4b..f2b2eed17 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java +++ b/classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java @@ -33,7 +33,7 @@ public class TOutputStreamWriter extends TWriter { private boolean closed; public TOutputStreamWriter(TOutputStream out) { - this(nullCheck(out), new TUTF8Charset()); + this(nullCheck(out), TUTF8Charset.INSTANCE); } public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException { diff --git a/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java b/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java index 0a543ba2a..431e80fd3 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java +++ b/classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java @@ -48,7 +48,7 @@ public class TPrintStream extends TFilterOutputStream { public TPrintStream(TOutputStream out, boolean autoFlush) { super(out); this.autoFlush = autoFlush; - this.charset = new TUTF8Charset(); + this.charset = TUTF8Charset.INSTANCE; } public TPrintStream(TOutputStream out) { diff --git a/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java b/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java index ae628df92..4332247b5 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java +++ b/classlib/src/main/java/org/teavm/classlib/java/lang/TString.java @@ -68,7 +68,7 @@ public class TString extends TObject implements TSerializable, TComparable { private String canonicalName; @@ -143,7 +142,12 @@ public abstract class TCharset implements Comparable { private static final Map value = new HashMap<>(); static { - value.put("UTF-8", new TUTF8Charset()); + TCharset[] charsets = { TStandardCharsets.UTF_8, TStandardCharsets.US_ASCII, + TStandardCharsets.ISO_8859_1, TStandardCharsets.UTF_16, TStandardCharsets.UTF_16BE, + TStandardCharsets.UTF_16LE }; + for (TCharset charset : charsets) { + value.put(charset.name(), charset); + } } } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCoderResult.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCoderResult.java index be470d8b2..650d4a0a5 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCoderResult.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCoderResult.java @@ -15,6 +15,8 @@ */ package org.teavm.classlib.java.nio.charset; +import java.util.Objects; + public class TCoderResult { public static final TCoderResult UNDERFLOW = new TCoderResult((byte) 0, 0); public static final TCoderResult OVERFLOW = new TCoderResult((byte) 1, 0); @@ -90,4 +92,21 @@ public class TCoderResult { throw new AssertionError(); } } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TCoderResult that = (TCoderResult) o; + return kind == that.kind && length == that.length; + } + + @Override + public int hashCode() { + return Objects.hash(kind, length); + } } diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TStandardCharsets.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TStandardCharsets.java index d2a5dcb6c..6bb58428f 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TStandardCharsets.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/TStandardCharsets.java @@ -15,11 +15,19 @@ */ package org.teavm.classlib.java.nio.charset; +import org.teavm.classlib.java.nio.charset.impl.TAsciiCharset; +import org.teavm.classlib.java.nio.charset.impl.TIso8859Charset; +import org.teavm.classlib.java.nio.charset.impl.TUTF16Charset; import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; public final class TStandardCharsets { private TStandardCharsets() { } - public static final TCharset UTF_8 = new TUTF8Charset(); + public static final TCharset UTF_8 = TUTF8Charset.INSTANCE; + public static final TCharset US_ASCII = new TAsciiCharset(); + public static final TCharset ISO_8859_1 = new TIso8859Charset(); + public static final TCharset UTF_16 = new TUTF16Charset("UTF-16", true, false); + public static final TCharset UTF_16BE = new TUTF16Charset("UTF-16BE", false, false); + public static final TCharset UTF_16LE = new TUTF16Charset("UTF-16LE", false, true); } diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiCharset.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiCharset.java new file mode 100644 index 000000000..30e736e80 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiCharset.java @@ -0,0 +1,41 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCharsetDecoder; +import org.teavm.classlib.java.nio.charset.TCharsetEncoder; + +public class TAsciiCharset extends TCharset { + public TAsciiCharset() { + super("US-ASCII", new String[0]); + } + + @Override + public boolean contains(TCharset cs) { + return cs == this; + } + + @Override + public TCharsetDecoder newDecoder() { + return new TAsciiDecoder(this); + } + + @Override + public TCharsetEncoder newEncoder() { + return new TAsciiEncoder(this); + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiDecoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiDecoder.java new file mode 100644 index 000000000..db09ddda6 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiDecoder.java @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TAsciiDecoder extends TBufferedDecoder { + public TAsciiDecoder(TCharset cs) { + super(cs, 1, 1); + } + + @Override + protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + int b = inArray[inPos++] & 0xFf; + if ((b & 0x80) != 0) { + result = TCoderResult.malformedForLength(1); + --inPos; + break; + } else { + outArray[outPos++] = (char) b; + } + } + + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiEncoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiEncoder.java new file mode 100644 index 000000000..47cbd572d --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TAsciiEncoder.java @@ -0,0 +1,65 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TAsciiEncoder extends TBufferedEncoder { + public TAsciiEncoder(TCharset cs) { + super(cs, 1, 1); + } + + @Override + protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + char c = inArray[inPos++]; + if (Character.isHighSurrogate(c)) { + if (inPos >= inSize) { + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } else { + inPos--; + } + break; + } else { + char next = inArray[inPos]; + if (!Character.isLowSurrogate(next)) { + result = TCoderResult.malformedForLength(1); + } else { + --inPos; + result = TCoderResult.unmappableForLength(2); + } + break; + } + } else if (Character.isLowSurrogate(c)) { + result = TCoderResult.malformedForLength(1); + } + if (c < 128) { + outArray[outPos++] = (byte) c; + } else { + result = TCoderResult.unmappableForLength(1); + --inPos; + break; + } + } + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedDecoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedDecoder.java index f81a2d1c6..7abcc9477 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedDecoder.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedDecoder.java @@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder; import org.teavm.classlib.java.nio.charset.TCoderResult; public abstract class TBufferedDecoder extends TCharsetDecoder { + private byte[] inArray = new byte[512]; + private char[] outArray = new char[512]; + public TBufferedDecoder(TCharset cs, float averageCharsPerByte, float maxCharsPerByte) { super(cs, averageCharsPerByte, maxCharsPerByte); } @@ -29,12 +32,11 @@ public abstract class TBufferedDecoder extends TCharsetDecoder { @Override protected TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out) { // Use intermediate array to batch buffer operations - int outPos = 0; - byte[] inArray = new byte[Math.min(in.remaining(), 512)]; + byte[] inArray = this.inArray; int inPos = 0; int inSize = 0; - char[] outArray = new char[Math.min(out.remaining(), 512)]; - TCoderResult result = null; + char[] outArray = this.outArray; + TCoderResult result; while (true) { // If there were remaining bytes in input buffer, copy them to the beginning of input array @@ -55,7 +57,7 @@ public abstract class TBufferedDecoder extends TCharsetDecoder { } // Perform iteration - outPos = 0; + int outPos = 0; int outSize = Math.min(out.remaining(), outArray.length); Controller controller = new Controller(in, out); result = arrayDecode(inArray, inPos, inSize, outArray, outPos, outSize, controller); diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedEncoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedEncoder.java index 2ba08c5a9..7ceda06c0 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedEncoder.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TBufferedEncoder.java @@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetEncoder; import org.teavm.classlib.java.nio.charset.TCoderResult; public abstract class TBufferedEncoder extends TCharsetEncoder { + private char[] inArray = new char[512]; + private byte[] outArray = new byte[512]; + public TBufferedEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) { super(cs, averageBytesPerChar, maxBytesPerChar, replacement); } @@ -33,12 +36,11 @@ public abstract class TBufferedEncoder extends TCharsetEncoder { @Override protected TCoderResult encodeLoop(TCharBuffer in, TByteBuffer out) { // Use intermediate array to batch buffer operations - int outPos = 0; - char[] inArray = new char[Math.min(in.remaining(), 512)]; + char[] inArray = this.inArray; int inPos = 0; int inSize = 0; - byte[] outArray = new byte[Math.min(out.remaining(), 512)]; - TCoderResult result = null; + byte[] outArray = this.outArray; + TCoderResult result; while (true) { // If there were remaining bytes in input buffer, copy them to the beginning of input array @@ -59,15 +61,19 @@ public abstract class TBufferedEncoder extends TCharsetEncoder { } // Perform iteration - outPos = 0; + int outPos = 0; int outSize = Math.min(out.remaining(), outArray.length); Controller controller = new Controller(in, out); result = arrayEncode(inArray, inPos, inSize, outArray, outPos, outSize, controller); inPos = controller.inPosition; - if (result == null && outPos == controller.outPosition) { - result = TCoderResult.UNDERFLOW; - } outPos = controller.outPosition; + if (result == null) { + if (!in.hasRemaining() && inPos >= inSize) { + result = TCoderResult.UNDERFLOW; + } else if (!out.hasRemaining() && inPos >= inSize) { + result = TCoderResult.OVERFLOW; + } + } // Write any output characters to out buffer out.put(outArray, 0, outPos); diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Charset.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Charset.java new file mode 100644 index 000000000..45e52822c --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Charset.java @@ -0,0 +1,41 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCharsetDecoder; +import org.teavm.classlib.java.nio.charset.TCharsetEncoder; + +public class TIso8859Charset extends TCharset { + public TIso8859Charset() { + super("ISO-8859-1", new String[0]); + } + + @Override + public boolean contains(TCharset cs) { + return cs == this; + } + + @Override + public TCharsetDecoder newDecoder() { + return new TIso8859Decoder(this); + } + + @Override + public TCharsetEncoder newEncoder() { + return new TIso8859Encoder(this); + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Decoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Decoder.java new file mode 100644 index 000000000..1d4687115 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Decoder.java @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TIso8859Decoder extends TBufferedDecoder { + public TIso8859Decoder(TCharset cs) { + super(cs, 1, 1); + } + + @Override + protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + int b = inArray[inPos++] & 0xFf; + outArray[outPos++] = (char) b; + } + + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Encoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Encoder.java new file mode 100644 index 000000000..ca8c8333e --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TIso8859Encoder.java @@ -0,0 +1,65 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TIso8859Encoder extends TBufferedEncoder { + public TIso8859Encoder(TCharset cs) { + super(cs, 1, 1); + } + + @Override + protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + char c = inArray[inPos++]; + if (Character.isHighSurrogate(c)) { + if (inPos == inSize) { + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } else { + inPos--; + } + break; + } else { + char next = inArray[inPos]; + if (!Character.isLowSurrogate(next)) { + result = TCoderResult.malformedForLength(1); + } else { + --inPos; + result = TCoderResult.unmappableForLength(2); + } + break; + } + } else if (Character.isLowSurrogate(c)) { + result = TCoderResult.malformedForLength(1); + } + if (c < 256) { + outArray[outPos++] = (byte) c; + } else { + --inPos; + result = TCoderResult.unmappableForLength(1); + break; + } + } + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Charset.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Charset.java new file mode 100644 index 000000000..12f59c573 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Charset.java @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCharsetDecoder; +import org.teavm.classlib.java.nio.charset.TCharsetEncoder; + +public class TUTF16Charset extends TCharset { + private boolean bom; + private boolean littleEndian; + + public TUTF16Charset(String canonicalName, boolean bom, boolean littleEndian) { + super(canonicalName, new String[0]); + this.bom = bom; + this.littleEndian = littleEndian; + } + + @Override + public boolean contains(TCharset cs) { + return cs == this; + } + + @Override + public TCharsetDecoder newDecoder() { + return new TUTF16Decoder(this, bom, littleEndian); + } + + @Override + public TCharsetEncoder newEncoder() { + return new TUTF16Encoder(this, bom, littleEndian); + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Decoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Decoder.java new file mode 100644 index 000000000..a62fb6879 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Decoder.java @@ -0,0 +1,167 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TUTF16Decoder extends TBufferedDecoder { + private boolean bom; + private boolean littleEndian; + + public TUTF16Decoder(TCharset cs, boolean bom, boolean littleEndian) { + super(cs, 0.5f, 0.5f); + this.bom = bom; + this.littleEndian = littleEndian; + } + + @Override + protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize, + Controller controller) { + if (bom) { + if (inPos + 2 > inSize) { + return controller.hasMoreInput() ? null : TCoderResult.UNDERFLOW; + } + bom = false; + byte b = inArray[inPos++]; + if (b == (byte) 0xFF) { + if (inArray[inPos] == (byte) 0xFE) { + inPos++; + littleEndian = true; + } else { + inPos--; + } + } else if (b == (byte) 0xFE) { + if (inArray[inPos] == (byte) 0xFF) { + inPos++; + littleEndian = false; + } else { + inPos--; + } + } else { + inPos--; + } + } + + return littleEndian + ? decodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller) + : decodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller); + } + + private TCoderResult decodeLE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + if (inPos + 2 > inSize) { + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } + break; + } + int b1 = inArray[inPos++] & 0xFF; + int b2 = inArray[inPos++] & 0xFF; + char c = (char) (b1 | (b2 << 8)); + if (Character.isHighSurrogate(c)) { + if (inPos + 2 >= inSize) { + if (!controller.hasMoreInput(4)) { + result = TCoderResult.UNDERFLOW; + } + inPos -= 2; + break; + } + b1 = inArray[inPos++] & 0xFF; + b2 = inArray[inPos++] & 0xFF; + char next = (char) (b1 | (b2 << 8)); + if (!Character.isLowSurrogate(next)) { + inPos -= 4; + result = TCoderResult.malformedForLength(4); + break; + } else { + if (outPos + 2 > outSize) { + if (!controller.hasMoreOutput(2)) { + result = TCoderResult.OVERFLOW; + } + break; + } else { + outArray[outPos++] = c; + outArray[outPos++] = next; + } + } + } else if (Character.isLowSurrogate(c)) { + inPos -= 2; + result = TCoderResult.malformedForLength(2); + break; + } else { + outArray[outPos++] = c; + } + } + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } + + private TCoderResult decodeBE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + if (inPos + 2 > inSize) { + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } + break; + } + int b1 = inArray[inPos++] & 0xFF; + int b2 = inArray[inPos++] & 0xFF; + char c = (char) (b2 | (b1 << 8)); + if (Character.isHighSurrogate(c)) { + if (inPos + 2 >= inSize) { + if (!controller.hasMoreInput(4)) { + result = TCoderResult.UNDERFLOW; + } + inPos -= 2; + break; + } + b1 = inArray[inPos++] & 0xFF; + b2 = inArray[inPos++] & 0xFF; + char next = (char) (b2 | (b1 << 8)); + if (!Character.isLowSurrogate(next)) { + inPos -= 4; + result = TCoderResult.malformedForLength(4); + break; + } else { + if (outPos + 2 > outSize) { + if (!controller.hasMoreOutput(2)) { + result = TCoderResult.OVERFLOW; + } + break; + } else { + outArray[outPos++] = c; + outArray[outPos++] = next; + } + } + } else if (Character.isLowSurrogate(c)) { + inPos -= 2; + result = TCoderResult.malformedForLength(2); + break; + } else { + outArray[outPos++] = c; + } + } + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Encoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Encoder.java new file mode 100644 index 000000000..c9d51b455 --- /dev/null +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF16Encoder.java @@ -0,0 +1,162 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +public class TUTF16Encoder extends TBufferedEncoder { + private boolean bom; + private boolean littleEndian; + + public TUTF16Encoder(TCharset cs, boolean bom, boolean littleEndian) { + super(cs, 2, 4); + this.bom = bom; + this.littleEndian = littleEndian; + } + + @Override + protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize, + Controller controller) { + if (bom) { + if (outPos + 2 > outSize) { + return controller.hasMoreOutput() ? null : TCoderResult.OVERFLOW; + } + bom = false; + if (littleEndian) { + outArray[outPos++] = (byte) 0xFF; + outArray[outPos++] = (byte) 0xFE; + } else { + outArray[outPos++] = (byte) 0xFE; + outArray[outPos++] = (byte) 0xFF; + } + } + + return littleEndian + ? arrayEncodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller) + : arrayEncodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller); + } + + private TCoderResult arrayEncodeLE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + char c = inArray[inPos++]; + if (Character.isHighSurrogate(c)) { + if (inPos == inSize) { + inPos--; + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } + break; + } + char next = inArray[inPos++]; + if (Character.isLowSurrogate(next)) { + if (outPos + 4 <= outSize) { + outArray[outPos++] = (byte) (c & 0xFF); + outArray[outPos++] = (byte) (c >> 8); + outArray[outPos++] = (byte) (next & 0xFF); + outArray[outPos++] = (byte) (next >> 8); + } else { + inPos -= 2; + if (!controller.hasMoreOutput(4)) { + result = TCoderResult.OVERFLOW; + } + break; + } + } else { + inPos -= 2; + result = TCoderResult.malformedForLength(1); + break; + } + } else if (Character.isLowSurrogate(c)) { + inPos--; + result = TCoderResult.malformedForLength(1); + break; + } else { + if (outPos + 2 <= outSize) { + outArray[outPos++] = (byte) (c & 0xFF); + outArray[outPos++] = (byte) (c >> 8); + } else { + inPos--; + if (!controller.hasMoreOutput(2)) { + result = TCoderResult.OVERFLOW; + } + break; + } + } + } + + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } + + private TCoderResult arrayEncodeBE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize, + Controller controller) { + TCoderResult result = null; + while (inPos < inSize && outPos < outSize) { + char c = inArray[inPos++]; + if (Character.isHighSurrogate(c)) { + if (inPos == inSize) { + inPos--; + if (!controller.hasMoreInput(2)) { + result = TCoderResult.UNDERFLOW; + } + break; + } + char next = inArray[inPos++]; + if (Character.isLowSurrogate(next)) { + if (outPos + 4 <= outSize) { + outArray[outPos++] = (byte) (c >> 8); + outArray[outPos++] = (byte) (c & 0xFF); + outArray[outPos++] = (byte) (next >> 8); + outArray[outPos++] = (byte) (next & 0xFF); + } else { + inPos -= 2; + if (!controller.hasMoreOutput(4)) { + result = TCoderResult.OVERFLOW; + } + break; + } + } else { + inPos -= 2; + result = TCoderResult.malformedForLength(1); + break; + } + } else if (Character.isLowSurrogate(c)) { + inPos--; + result = TCoderResult.malformedForLength(1); + break; + } else { + if (outPos + 2 <= outSize) { + outArray[outPos++] = (byte) (c >> 8); + outArray[outPos++] = (byte) (c & 0xFF); + } else { + inPos--; + if (!controller.hasMoreOutput(2)) { + result = TCoderResult.OVERFLOW; + } + break; + } + } + } + + controller.setInPosition(inPos); + controller.setOutPosition(outPos); + return result; + } +} diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Charset.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Charset.java index 3520a464e..259873ee7 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Charset.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Charset.java @@ -20,7 +20,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder; import org.teavm.classlib.java.nio.charset.TCharsetEncoder; public class TUTF8Charset extends TCharset { - public TUTF8Charset() { + public static final TUTF8Charset INSTANCE = new TUTF8Charset(); + + private TUTF8Charset() { super("UTF-8", new String[0]); } diff --git a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Decoder.java b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Decoder.java index 0f2f38dc3..4fb9683d1 100644 --- a/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Decoder.java +++ b/classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/TUTF8Decoder.java @@ -34,7 +34,7 @@ public class TUTF8Decoder extends TBufferedDecoder { } else if ((b & 0xE0) == 0xC0) { if (inPos >= inSize) { --inPos; - if (!controller.hasMoreInput()) { + if (!controller.hasMoreInput(2)) { result = TCoderResult.UNDERFLOW; } break; @@ -49,7 +49,7 @@ public class TUTF8Decoder extends TBufferedDecoder { } else if ((b & 0xF0) == 0xE0) { if (inPos + 2 > inSize) { --inPos; - if (!controller.hasMoreInput()) { + if (!controller.hasMoreInput(3)) { result = TCoderResult.UNDERFLOW; } break; @@ -71,7 +71,7 @@ public class TUTF8Decoder extends TBufferedDecoder { } else if ((b & 0xF8) == 0xF0) { if (inPos + 3 > inSize) { --inPos; - if (!controller.hasMoreInput()) { + if (!controller.hasMoreInput(4)) { result = TCoderResult.UNDERFLOW; } break; diff --git a/tests/src/test/java/org/teavm/classlib/java/nio/charset/AsciiTest.java b/tests/src/test/java/org/teavm/classlib/java/nio/charset/AsciiTest.java new file mode 100644 index 000000000..4a048d97e --- /dev/null +++ b/tests/src/test/java/org/teavm/classlib/java/nio/charset/AsciiTest.java @@ -0,0 +1,89 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.teavm.junit.TeaVMTestRunner; +import org.teavm.junit.WholeClassCompilation; + +@RunWith(TeaVMTestRunner.class) +@WholeClassCompilation +public class AsciiTest { + private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText)); + + private static byte[] convert(String value) { + byte[] result = new byte[value.length()]; + for (int i = 0; i < value.length(); ++i) { + result[i] = (byte) value.charAt(i); + } + return result; + } + + @Test + public void encode1() { + runEncode(600, 600); + } + + @Test + public void encode2() { + runEncode(600, 100); + } + + @Test + public void encode3() { + runEncode(100, 600); + } + + @Test + public void decode1() { + runDecode(600, 600); + } + + @Test + public void decode2() { + runDecode(600, 100); + } + + @Test + public void decode3() { + runDecode(100, 600); + } + + @Test + public void encodeUnmappable() { + Charset charset = StandardCharsets.US_ASCII; + CharsetTestCommon.checkUnmappable(charset, "ц", 1); + CharsetTestCommon.checkUnmappable(charset, "£", 1); + CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2); + } + + @Test + public void decodeMalformed() { + Charset charset = StandardCharsets.US_ASCII; + CharsetTestCommon.checkMalformed(charset, new byte[] { (byte) 0xA3 }, 1); + } + + private void runEncode(int inSize, int outSize) { + CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize); + } + + private void runDecode(int inSize, int outSize) { + CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize); + } +} diff --git a/tests/src/test/java/org/teavm/classlib/java/nio/charset/CharsetTestCommon.java b/tests/src/test/java/org/teavm/classlib/java/nio/charset/CharsetTestCommon.java new file mode 100644 index 000000000..a2afbb2ee --- /dev/null +++ b/tests/src/test/java/org/teavm/classlib/java/nio/charset/CharsetTestCommon.java @@ -0,0 +1,207 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +import static org.junit.Assert.assertEquals; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.util.Arrays; + +final class CharsetTestCommon { + private static char[] hexDigits = "0123456789ABCDEF".toCharArray(); + + static final String text = "" + + "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь " + + "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, могли ясно " + + "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас выше, что " + + "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, произошла " + + "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного странного " + + "обстоятельства, может быть вовсе не связанного ничем с ходом дела. " + + "Дней десять назад зашел ко мне Рогожин, " + + "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина прежде, " + + "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и приходил " + + "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, " + + "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой день сам, " + + "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего продолжать " + + "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. " + + "Между нами был такой контраст, " + + "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, а он - " + + "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о „последних“ " + + "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на чем он " + + "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому литератору, не " + + "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек с умом и " + + "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем „последнем " + + "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. " + + "Он промолчал, он ужасно молчалив. " + + "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - " + + "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам вовсе не " + + "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и кислою " + + "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и просто-запросто вывел " + + "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил меня; " + + "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, " + + "непосредственная жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке."; + + static String asciiText = "" + + "Meanwhile, the various members of Sleary`s company gradually gathered together from the upper " + + "regions, where they were quartered, and, from standing about, talking in low voices to one another " + + "and to Mr. Childers, gradually insinuated themselves and him into the room. There were two or three " + + "handsome young women among them, with their two or three husbands, and their two or three mothers, " + + "and their eight or nine little children, who did the fairy business when required. " + + "The father of one of the families was in the habit of balancing the father of another of the families " + + "on the top of a great pole; the father of a third family often made a pyramid of both those fathers, " + + "with Master Kidderminster for the apex, and himself for the base; all the fathers could dance upon " + + "rolling casks, stand upon bottles, catch knives and balls, twirl hand-basins, ride upon anything, " + + "jump over everything, and stick at nothing. All the mothers could (and did) dance, upon the slack " + + "wire and the tight-rope, and perform rapid acts on bare-backed steeds; none of them were at all " + + "particular in respect of showing their legs; and one of them, alone in a Greek chariot, drove six " + + "in hand into every town they came to. They all assumed to be mighty rakish and knowing, they were " + + "not very tidy in their private dresses, they were not at all orderly in their domestic arrangements, " + + "and the combined literature of the whole company would have produced but a poor letter on any subject. " + + "Yet there was a remarkable gentleness and childishness about these people, a special inaptitude " + + "for any kind of sharp practice, and an untiring readiness to help and pity one another, deserving " + + "often of as much respect, and always of as much generous construction, as the every-day virtues of " + + "any class of people in the world."; + + private CharsetTestCommon() { + } + + static void runEncode(String hex, String text, Charset charset, int inSize, int outSize) { + char[] input = text.toCharArray(); + byte[] output = new byte[16384]; + int inPos = 0; + int outPos = 0; + CharsetEncoder encoder = charset.newEncoder(); + CoderResult result; + + do { + int inLen = Math.min(inSize, input.length - inPos); + CharBuffer in = CharBuffer.wrap(input, inPos, inLen); + int outLen = Math.min(outSize, output.length - outPos); + ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen); + result = encoder.encode(in, out, inPos + inLen >= input.length); + inPos = in.position(); + outPos = out.position(); + } while (!result.isError() && inPos < input.length); + + assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result); + + do { + int outLen = Math.min(outSize, output.length - outPos); + ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen); + result = encoder.flush(out); + outPos = out.position(); + } while (!result.isUnderflow()); + + assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result); + output = Arrays.copyOf(output, outPos); + assertEquals(hex, bytesToHex(output)); + } + + static void runDecode(String hex, String text, Charset charset, int inSize, int outSize) { + byte[] input = hexToBytes(hex); + char[] output = new char[16384]; + int inPos = 0; + int outPos = 0; + CharsetDecoder decoder = charset.newDecoder(); + CoderResult result; + + do { + int inLen = Math.min(inSize, input.length - inPos); + ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen); + int outLen = Math.min(outSize, output.length - outPos); + CharBuffer out = CharBuffer.wrap(output, outPos, outLen); + result = decoder.decode(in, out, inPos + inLen >= input.length); + inPos = in.position(); + outPos = out.position(); + } while (!result.isError() && inPos < input.length); + + assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result); + + do { + int outLen = Math.min(outSize, output.length - outPos); + CharBuffer out = CharBuffer.wrap(output, outPos, outLen); + result = decoder.flush(out); + outPos = out.position(); + } while (!result.isUnderflow()); + + assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result); + output = Arrays.copyOf(output, outPos); + assertEquals(text, new String(output)); + } + + static String bytesToHex(byte[] bytes) { + char[] result = new char[bytes.length * 2]; + int j = 0; + for (int i = 0; i < bytes.length; ++i) { + int b = bytes[i] & 0xFF; + result[j++] = hexDigits[b >> 4]; + result[j++] = hexDigits[b & 0xF]; + } + return new String(result); + } + + static byte[] hexToBytes(String hex) { + char[] chars = hex.toCharArray(); + byte[] result = new byte[chars.length / 2]; + int j = 0; + for (int i = 0; i < chars.length; i += 2) { + char hi = chars[i]; + char lo = chars[i + 1]; + result[j++] = (byte) ((digit(hi) << 4) | digit(lo)); + } + return result; + } + + private static int digit(char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } + return c - 'A' + 10; + } + + static void checkUnmappable(Charset charset, String text, int length) { + CharBuffer input = CharBuffer.wrap(text); + byte[] result = new byte[100]; + ByteBuffer output = ByteBuffer.wrap(result); + CoderResult coderResult = charset.newEncoder().encode(input, output, true); + assertEquals(CoderResult.unmappableForLength(length), coderResult); + assertEquals(0, input.position()); + assertEquals(0, output.position()); + } + + static void checkMalformed(Charset charset, String text, int length) { + CharBuffer input = CharBuffer.wrap(text); + byte[] result = new byte[100]; + ByteBuffer output = ByteBuffer.wrap(result); + CoderResult coderResult = charset.newEncoder().encode(input, output, true); + assertEquals(CoderResult.malformedForLength(length), coderResult); + assertEquals(0, input.position()); + assertEquals(0, output.position()); + } + + static void checkMalformed(Charset charset, byte[] data, int length) { + ByteBuffer input = ByteBuffer.wrap(data); + CharBuffer output = CharBuffer.wrap(new char[100]); + CoderResult coderResult = charset.newDecoder().decode(input, output, true); + assertEquals(CoderResult.malformedForLength(length), coderResult); + assertEquals(0, input.position()); + assertEquals(0, output.position()); + } +} diff --git a/tests/src/test/java/org/teavm/classlib/java/nio/charset/Iso8859Test.java b/tests/src/test/java/org/teavm/classlib/java/nio/charset/Iso8859Test.java new file mode 100644 index 000000000..15ec97c57 --- /dev/null +++ b/tests/src/test/java/org/teavm/classlib/java/nio/charset/Iso8859Test.java @@ -0,0 +1,94 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.teavm.junit.TeaVMTestRunner; +import org.teavm.junit.WholeClassCompilation; + +@RunWith(TeaVMTestRunner.class) +@WholeClassCompilation +public class Iso8859Test { + private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText)); + + private static byte[] convert(String value) { + byte[] result = new byte[value.length()]; + for (int i = 0; i < value.length(); ++i) { + result[i] = (byte) value.charAt(i); + } + return result; + } + + @Test + public void encode1() { + runEncode(600, 600); + } + + @Test + public void encode2() { + runEncode(600, 100); + } + + @Test + public void encode3() { + runEncode(100, 600); + } + + @Test + public void decode1() { + runDecode(600, 600); + } + + @Test + public void decode2() { + runDecode(600, 100); + } + + @Test + public void decode3() { + runDecode(100, 600); + } + + @Test + public void encodeUnmappable() { + Charset charset = StandardCharsets.ISO_8859_1; + CharsetTestCommon.checkUnmappable(charset, "ц", 1); + CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2); + } + + @Test + public void decodeNonAscii() { + assertEquals("£", new String(new byte[] { (byte) 0xA3 }, StandardCharsets.ISO_8859_1)); + } + + @Test + public void encodeNonAscii() { + assertArrayEquals(new byte[] { (byte) 0xA3 }, "£".getBytes(StandardCharsets.ISO_8859_1)); + } + + private void runEncode(int inSize, int outSize) { + CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize); + } + + private void runDecode(int inSize, int outSize) { + CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize); + } +} diff --git a/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF16Test.java b/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF16Test.java new file mode 100644 index 000000000..f22de041b --- /dev/null +++ b/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF16Test.java @@ -0,0 +1,134 @@ +/* + * Copyright 2021 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +import static org.junit.Assert.assertEquals; +import java.nio.charset.StandardCharsets; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.teavm.junit.TeaVMTestRunner; +import org.teavm.junit.WholeClassCompilation; + +@RunWith(TeaVMTestRunner.class) +@WholeClassCompilation +public class UTF16Test { + private static String hexLE = CharsetTestCommon.bytesToHex(convertLE(CharsetTestCommon.text)); + private static String hexBE = CharsetTestCommon.bytesToHex(convertBE(CharsetTestCommon.text)); + private static String hexLEBom = "FFFE" + hexLE; + private static String hexBEBom = "FEFF" + hexBE; + private static boolean littleEndian = "0".getBytes(StandardCharsets.UTF_16)[0] == 0xFF; + + private static byte[] convertLE(String text) { + byte[] data = new byte[text.length() * 2]; + for (int i = 0; i < text.length(); ++i) { + char c = text.charAt(i); + data[i * 2] = (byte) (c & 0xFF); + data[i * 2 + 1] = (byte) ((c >> 8) & 0xFF); + } + return data; + } + + private static byte[] convertBE(String text) { + byte[] data = new byte[text.length() * 2]; + for (int i = 0; i < text.length(); ++i) { + char c = text.charAt(i); + data[i * 2] = (byte) ((c >> 8) & 0xFF); + data[i * 2 + 1] = (byte) (c & 0xFF); + } + return data; + } + + @Test + public void encode1() { + runEncode(600, 600); + } + + @Test + public void encode2() { + runEncode(600, 100); + } + + @Test + public void encode3() { + runEncode(100, 600); + } + + @Test + public void encode4() { + runEncode(600, 99); + } + + @Test + public void decode1() { + runDecode(600, 600); + } + + @Test + public void decode2() { + runDecode(600, 100); + } + + @Test + public void decode3() { + runDecode(100, 600); + } + + @Test + public void decode4() { + runDecode(99, 600); + } + + @Test + public void encodeMalformedSurrogate() { + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800\uD800", 1); + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800a", 1); + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uDC00\uD800", 1); + } + + @Test + public void encodeSurrogate() { + String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE)); + assertEquals("D800DC00", hex); + } + + @Test + public void decodeSurrogate() { + String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE)); + assertEquals("D800DC00", hex); + } + + @Test + public void decodeMalformedSurrogate() { + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D800D800"), 4); + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D8000041"), 4); + CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("DC00D800"), 2); + } + + private void runEncode(int inSize, int outSize) { + CharsetTestCommon.runEncode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize); + CharsetTestCommon.runEncode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize); + CharsetTestCommon.runEncode(littleEndian ? hexLEBom : hexBEBom, CharsetTestCommon.text, + StandardCharsets.UTF_16, inSize, outSize); + } + + private void runDecode(int inSize, int outSize) { + CharsetTestCommon.runDecode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize); + CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize); + CharsetTestCommon.runDecode(hexBEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize); + CharsetTestCommon.runDecode(hexLEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize); + CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize); + } +} diff --git a/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java b/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java index dca8d4a7d..b71005902 100644 --- a/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java +++ b/tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java @@ -17,13 +17,11 @@ package org.teavm.classlib.java.nio.charset; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import org.junit.Test; import org.junit.runner.RunWith; @@ -33,54 +31,8 @@ import org.teavm.junit.WholeClassCompilation; @RunWith(TeaVMTestRunner.class) @WholeClassCompilation public class UTF8Test { - private static char[] hexDigits = "0123456789ABCDEF".toCharArray(); - // Fragment from "The Idiot" by F. Dostoevsky - private String text = - "" + "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь " - + "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, " - + "могли ясно " - + "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас " - + "выше, что " - + "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, " - + "произошла " - + "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного " - + "странного " + "обстоятельства, может быть вовсе не связанного ничем с ходом дела. " - + "Дней десять назад зашел ко мне Рогожин, " - + "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина " - + "прежде, " - + "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и " - + "приходил " - + "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, " - + "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой " - + "день сам, " - + "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего " - + "продолжать " + "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. " - + "Между нами был такой контраст, " - + "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, " - + "а он - " - + "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о " - + "„последних“ " - + "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на " - + "чем он " + "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому " - + "литератору, не " - + "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек" - + " с умом и " - + "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем " - + "„последнем " + "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. " - + "Он промолчал, он ужасно молчалив. " - + "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - " - + "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам " - + "вовсе не " - + "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и " - + "кислою " + "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и " - + "просто-запросто вывел " - + "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил " - + "меня; " + "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, " - + "непосредственная " - + "жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке."; private String hex = "" - + - "D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED" + + "D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED" + "182D0BCD0B5D187D0B0D18E20D0B2D181D0B520D18DD182D0B820D186D0B8D184D180D18B20D0B820D187D0B8D181D0BBD" + "0B02E20D09CD0BDD0B52C20D0BAD0BED0BDD0B5D187D0BDD0BE2C20D0B2D181D19120D180D0B0D0B2D0BDD0BE20D0B1D18" + "3D0B4D0B5D1822C20D0BDD0BE20D182D0B5D0BFD0B5D180D18C2028D0B82C20D0BCD0BED0B6D0B5D18220D0B1D18BD182D" @@ -210,16 +162,29 @@ public class UTF8Test { @Test public void replaceMalformedSurrogatePair() { - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; ByteBuffer buffer = charset.encode("a\uD800\uD800b"); byte[] result = new byte[buffer.remaining()]; buffer.get(result); assertArrayEquals(new byte[] { 97, 63, 63, 98 }, result); } + @Test + public void malformedSurrogatePair() { + Charset charset = StandardCharsets.UTF_8; + byte[] result = new byte[100]; + CharBuffer input = CharBuffer.wrap("a\uD800\uD800b"); + ByteBuffer output = ByteBuffer.wrap(result); + CoderResult coderResult = charset.newEncoder().encode(input, output, true); + assertEquals(CoderResult.malformedForLength(1), coderResult); + result = Arrays.copyOf(result, output.position()); + assertArrayEquals(new byte[] { 97 }, result); + assertEquals(1, input.position()); + } + @Test public void encodeSurrogate() { - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; ByteBuffer buffer = charset.encode("a\uD800\uDC00b"); byte[] result = new byte[buffer.remaining()]; buffer.get(result); @@ -228,7 +193,7 @@ public class UTF8Test { @Test public void encodeSupplementary() { - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; StringBuilder sb = new StringBuilder(); sb.appendCodePoint(0xfedcb); ByteBuffer buffer = charset.encode(sb.toString()); @@ -239,7 +204,7 @@ public class UTF8Test { @Test public void replaceMalformedFirstByte() { - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xFF, 98 })); char[] result = new char[buffer.remaining()]; buffer.get(result); @@ -248,7 +213,7 @@ public class UTF8Test { @Test public void replaceMalformedMidByte() { - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xC0, 98, 98 })); char[] result = new char[buffer.remaining()]; buffer.get(result); @@ -265,115 +230,17 @@ public class UTF8Test { bytes[i++] = -78; bytes[i++] = -69; } - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; CharBuffer buffer = charset.decode(ByteBuffer.wrap(bytes)); assertEquals('\uD8BB', buffer.get(8190)); assertEquals('\uDCBB', buffer.get(8191)); } private void runEncode(int inSize, int outSize) { - char[] input = text.toCharArray(); - byte[] output = new byte[16384]; - int inPos = 0; - int outPos = 0; - CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder(); - CoderResult result = CoderResult.UNDERFLOW; - - while (true) { - int inLen = Math.min(inSize, input.length - inPos); - CharBuffer in = CharBuffer.wrap(input, inPos, inLen); - int outLen = Math.min(outSize, output.length - outPos); - ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen); - result = encoder.encode(in, out, inPos + inLen >= input.length); - inPos = in.position(); - outPos = out.position(); - if (result.isError() || inPos >= input.length) { - break; - } - } - - assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow()); - - while (true) { - int outLen = Math.min(outSize, output.length - outPos); - ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen); - result = encoder.flush(out); - outPos = out.position(); - if (result.isUnderflow()) { - break; - } - } - - assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow()); - output = Arrays.copyOf(output, outPos); - assertEquals(hex, bytesToHex(output)); + CharsetTestCommon.runEncode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize); } private void runDecode(int inSize, int outSize) { - byte[] input = hexToBytes(hex); - char[] output = new char[16384]; - int inPos = 0; - int outPos = 0; - CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); - CoderResult result = CoderResult.UNDERFLOW; - - while (true) { - int inLen = Math.min(inSize, input.length - inPos); - ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen); - int outLen = Math.min(outSize, output.length - outPos); - CharBuffer out = CharBuffer.wrap(output, outPos, outLen); - result = decoder.decode(in, out, inPos + inLen >= input.length); - inPos = in.position(); - outPos = out.position(); - if (result.isError() || inPos >= input.length) { - break; - } - } - - assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow()); - - while (true) { - int outLen = Math.min(outSize, output.length - outPos); - CharBuffer out = CharBuffer.wrap(output, outPos, outLen); - result = decoder.flush(out); - outPos = out.position(); - if (result.isUnderflow()) { - break; - } - } - - assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow()); - output = Arrays.copyOf(output, outPos); - assertEquals(text, new String(output)); - } - - private String bytesToHex(byte[] bytes) { - char[] result = new char[bytes.length * 2]; - int j = 0; - for (int i = 0; i < bytes.length; ++i) { - int b = bytes[i] & 0xFF; - result[j++] = hexDigits[b >> 4]; - result[j++] = hexDigits[b & 0xF]; - } - return new String(result); - } - - private byte[] hexToBytes(String hex) { - char[] chars = hex.toCharArray(); - byte[] result = new byte[chars.length / 2]; - int j = 0; - for (int i = 0; i < chars.length; i += 2) { - char hi = chars[i]; - char lo = chars[i + 1]; - result[j++] = (byte) ((digit(hi) << 4) | digit(lo)); - } - return result; - } - - private static int digit(char c) { - if (c >= '0' && c <= '9') { - return c - '0'; - } - return c - 'A' + 10; + CharsetTestCommon.runDecode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize); } }