mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2024-12-22 08:14:09 -08:00
classlib: implement missing charsets
This commit is contained in:
parent
9972fe0c29
commit
aa8a91d030
|
@ -45,7 +45,7 @@ public class TInputStreamReader extends TReader {
|
|||
}
|
||||
|
||||
public TInputStreamReader(TInputStream in) {
|
||||
this(in, new TUTF8Charset());
|
||||
this(in, TUTF8Charset.INSTANCE);
|
||||
}
|
||||
|
||||
public TInputStreamReader(TInputStream in, TCharsetDecoder decoder) {
|
||||
|
|
|
@ -33,7 +33,7 @@ public class TOutputStreamWriter extends TWriter {
|
|||
private boolean closed;
|
||||
|
||||
public TOutputStreamWriter(TOutputStream out) {
|
||||
this(nullCheck(out), new TUTF8Charset());
|
||||
this(nullCheck(out), TUTF8Charset.INSTANCE);
|
||||
}
|
||||
|
||||
public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException {
|
||||
|
|
|
@ -48,7 +48,7 @@ public class TPrintStream extends TFilterOutputStream {
|
|||
public TPrintStream(TOutputStream out, boolean autoFlush) {
|
||||
super(out);
|
||||
this.autoFlush = autoFlush;
|
||||
this.charset = new TUTF8Charset();
|
||||
this.charset = TUTF8Charset.INSTANCE;
|
||||
}
|
||||
|
||||
public TPrintStream(TOutputStream out) {
|
||||
|
|
|
@ -68,7 +68,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
|||
}
|
||||
|
||||
public TString(byte[] bytes, int offset, int length) {
|
||||
initWithBytes(bytes, offset, length, new TUTF8Charset());
|
||||
initWithBytes(bytes, offset, length, TUTF8Charset.INSTANCE);
|
||||
}
|
||||
|
||||
public TString(byte[] bytes) {
|
||||
|
@ -574,7 +574,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
|||
}
|
||||
|
||||
public byte[] getBytes() {
|
||||
return getBytes(new TUTF8Charset());
|
||||
return getBytes(TUTF8Charset.INSTANCE);
|
||||
}
|
||||
|
||||
public byte[] getBytes(TCharset charset) {
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.teavm.classlib.java.nio.charset;
|
|||
import java.util.*;
|
||||
import org.teavm.classlib.java.nio.TByteBuffer;
|
||||
import org.teavm.classlib.java.nio.TCharBuffer;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||
|
||||
public abstract class TCharset implements Comparable<TCharset> {
|
||||
private String canonicalName;
|
||||
|
@ -143,7 +142,12 @@ public abstract class TCharset implements Comparable<TCharset> {
|
|||
private static final Map<String, TCharset> value = new HashMap<>();
|
||||
|
||||
static {
|
||||
value.put("UTF-8", new TUTF8Charset());
|
||||
TCharset[] charsets = { TStandardCharsets.UTF_8, TStandardCharsets.US_ASCII,
|
||||
TStandardCharsets.ISO_8859_1, TStandardCharsets.UTF_16, TStandardCharsets.UTF_16BE,
|
||||
TStandardCharsets.UTF_16LE };
|
||||
for (TCharset charset : charsets) {
|
||||
value.put(charset.name(), charset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class TCoderResult {
|
||||
public static final TCoderResult UNDERFLOW = new TCoderResult((byte) 0, 0);
|
||||
public static final TCoderResult OVERFLOW = new TCoderResult((byte) 1, 0);
|
||||
|
@ -90,4 +92,21 @@ public class TCoderResult {
|
|||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
TCoderResult that = (TCoderResult) o;
|
||||
return kind == that.kind && length == that.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(kind, length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,11 +15,19 @@
|
|||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.impl.TAsciiCharset;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TIso8859Charset;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TUTF16Charset;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||
|
||||
public final class TStandardCharsets {
|
||||
private TStandardCharsets() {
|
||||
}
|
||||
|
||||
public static final TCharset UTF_8 = new TUTF8Charset();
|
||||
public static final TCharset UTF_8 = TUTF8Charset.INSTANCE;
|
||||
public static final TCharset US_ASCII = new TAsciiCharset();
|
||||
public static final TCharset ISO_8859_1 = new TIso8859Charset();
|
||||
public static final TCharset UTF_16 = new TUTF16Charset("UTF-16", true, false);
|
||||
public static final TCharset UTF_16BE = new TUTF16Charset("UTF-16BE", false, false);
|
||||
public static final TCharset UTF_16LE = new TUTF16Charset("UTF-16LE", false, true);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
|
||||
|
||||
public class TAsciiCharset extends TCharset {
|
||||
public TAsciiCharset() {
|
||||
super("US-ASCII", new String[0]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(TCharset cs) {
|
||||
return cs == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetDecoder newDecoder() {
|
||||
return new TAsciiDecoder(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetEncoder newEncoder() {
|
||||
return new TAsciiEncoder(this);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TAsciiDecoder extends TBufferedDecoder {
|
||||
public TAsciiDecoder(TCharset cs) {
|
||||
super(cs, 1, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
int b = inArray[inPos++] & 0xFf;
|
||||
if ((b & 0x80) != 0) {
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
--inPos;
|
||||
break;
|
||||
} else {
|
||||
outArray[outPos++] = (char) b;
|
||||
}
|
||||
}
|
||||
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TAsciiEncoder extends TBufferedEncoder {
|
||||
public TAsciiEncoder(TCharset cs) {
|
||||
super(cs, 1, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
char c = inArray[inPos++];
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos >= inSize) {
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
} else {
|
||||
inPos--;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
char next = inArray[inPos];
|
||||
if (!Character.isLowSurrogate(next)) {
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
} else {
|
||||
--inPos;
|
||||
result = TCoderResult.unmappableForLength(2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
}
|
||||
if (c < 128) {
|
||||
outArray[outPos++] = (byte) c;
|
||||
} else {
|
||||
result = TCoderResult.unmappableForLength(1);
|
||||
--inPos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
|||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public abstract class TBufferedDecoder extends TCharsetDecoder {
|
||||
private byte[] inArray = new byte[512];
|
||||
private char[] outArray = new char[512];
|
||||
|
||||
public TBufferedDecoder(TCharset cs, float averageCharsPerByte, float maxCharsPerByte) {
|
||||
super(cs, averageCharsPerByte, maxCharsPerByte);
|
||||
}
|
||||
|
@ -29,12 +32,11 @@ public abstract class TBufferedDecoder extends TCharsetDecoder {
|
|||
@Override
|
||||
protected TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out) {
|
||||
// Use intermediate array to batch buffer operations
|
||||
int outPos = 0;
|
||||
byte[] inArray = new byte[Math.min(in.remaining(), 512)];
|
||||
byte[] inArray = this.inArray;
|
||||
int inPos = 0;
|
||||
int inSize = 0;
|
||||
char[] outArray = new char[Math.min(out.remaining(), 512)];
|
||||
TCoderResult result = null;
|
||||
char[] outArray = this.outArray;
|
||||
TCoderResult result;
|
||||
|
||||
while (true) {
|
||||
// If there were remaining bytes in input buffer, copy them to the beginning of input array
|
||||
|
@ -55,7 +57,7 @@ public abstract class TBufferedDecoder extends TCharsetDecoder {
|
|||
}
|
||||
|
||||
// Perform iteration
|
||||
outPos = 0;
|
||||
int outPos = 0;
|
||||
int outSize = Math.min(out.remaining(), outArray.length);
|
||||
Controller controller = new Controller(in, out);
|
||||
result = arrayDecode(inArray, inPos, inSize, outArray, outPos, outSize, controller);
|
||||
|
|
|
@ -22,6 +22,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
|
|||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public abstract class TBufferedEncoder extends TCharsetEncoder {
|
||||
private char[] inArray = new char[512];
|
||||
private byte[] outArray = new byte[512];
|
||||
|
||||
public TBufferedEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) {
|
||||
super(cs, averageBytesPerChar, maxBytesPerChar, replacement);
|
||||
}
|
||||
|
@ -33,12 +36,11 @@ public abstract class TBufferedEncoder extends TCharsetEncoder {
|
|||
@Override
|
||||
protected TCoderResult encodeLoop(TCharBuffer in, TByteBuffer out) {
|
||||
// Use intermediate array to batch buffer operations
|
||||
int outPos = 0;
|
||||
char[] inArray = new char[Math.min(in.remaining(), 512)];
|
||||
char[] inArray = this.inArray;
|
||||
int inPos = 0;
|
||||
int inSize = 0;
|
||||
byte[] outArray = new byte[Math.min(out.remaining(), 512)];
|
||||
TCoderResult result = null;
|
||||
byte[] outArray = this.outArray;
|
||||
TCoderResult result;
|
||||
|
||||
while (true) {
|
||||
// If there were remaining bytes in input buffer, copy them to the beginning of input array
|
||||
|
@ -59,15 +61,19 @@ public abstract class TBufferedEncoder extends TCharsetEncoder {
|
|||
}
|
||||
|
||||
// Perform iteration
|
||||
outPos = 0;
|
||||
int outPos = 0;
|
||||
int outSize = Math.min(out.remaining(), outArray.length);
|
||||
Controller controller = new Controller(in, out);
|
||||
result = arrayEncode(inArray, inPos, inSize, outArray, outPos, outSize, controller);
|
||||
inPos = controller.inPosition;
|
||||
if (result == null && outPos == controller.outPosition) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
outPos = controller.outPosition;
|
||||
if (result == null) {
|
||||
if (!in.hasRemaining() && inPos >= inSize) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
} else if (!out.hasRemaining() && inPos >= inSize) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
// Write any output characters to out buffer
|
||||
out.put(outArray, 0, outPos);
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
|
||||
|
||||
public class TIso8859Charset extends TCharset {
|
||||
public TIso8859Charset() {
|
||||
super("ISO-8859-1", new String[0]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(TCharset cs) {
|
||||
return cs == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetDecoder newDecoder() {
|
||||
return new TIso8859Decoder(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetEncoder newEncoder() {
|
||||
return new TIso8859Encoder(this);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TIso8859Decoder extends TBufferedDecoder {
|
||||
public TIso8859Decoder(TCharset cs) {
|
||||
super(cs, 1, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
int b = inArray[inPos++] & 0xFf;
|
||||
outArray[outPos++] = (char) b;
|
||||
}
|
||||
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TIso8859Encoder extends TBufferedEncoder {
|
||||
public TIso8859Encoder(TCharset cs) {
|
||||
super(cs, 1, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
char c = inArray[inPos++];
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos == inSize) {
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
} else {
|
||||
inPos--;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
char next = inArray[inPos];
|
||||
if (!Character.isLowSurrogate(next)) {
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
} else {
|
||||
--inPos;
|
||||
result = TCoderResult.unmappableForLength(2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
}
|
||||
if (c < 256) {
|
||||
outArray[outPos++] = (byte) c;
|
||||
} else {
|
||||
--inPos;
|
||||
result = TCoderResult.unmappableForLength(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
|
||||
|
||||
public class TUTF16Charset extends TCharset {
|
||||
private boolean bom;
|
||||
private boolean littleEndian;
|
||||
|
||||
public TUTF16Charset(String canonicalName, boolean bom, boolean littleEndian) {
|
||||
super(canonicalName, new String[0]);
|
||||
this.bom = bom;
|
||||
this.littleEndian = littleEndian;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(TCharset cs) {
|
||||
return cs == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetDecoder newDecoder() {
|
||||
return new TUTF16Decoder(this, bom, littleEndian);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TCharsetEncoder newEncoder() {
|
||||
return new TUTF16Encoder(this, bom, littleEndian);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TUTF16Decoder extends TBufferedDecoder {
|
||||
private boolean bom;
|
||||
private boolean littleEndian;
|
||||
|
||||
public TUTF16Decoder(TCharset cs, boolean bom, boolean littleEndian) {
|
||||
super(cs, 0.5f, 0.5f);
|
||||
this.bom = bom;
|
||||
this.littleEndian = littleEndian;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayDecode(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
if (bom) {
|
||||
if (inPos + 2 > inSize) {
|
||||
return controller.hasMoreInput() ? null : TCoderResult.UNDERFLOW;
|
||||
}
|
||||
bom = false;
|
||||
byte b = inArray[inPos++];
|
||||
if (b == (byte) 0xFF) {
|
||||
if (inArray[inPos] == (byte) 0xFE) {
|
||||
inPos++;
|
||||
littleEndian = true;
|
||||
} else {
|
||||
inPos--;
|
||||
}
|
||||
} else if (b == (byte) 0xFE) {
|
||||
if (inArray[inPos] == (byte) 0xFF) {
|
||||
inPos++;
|
||||
littleEndian = false;
|
||||
} else {
|
||||
inPos--;
|
||||
}
|
||||
} else {
|
||||
inPos--;
|
||||
}
|
||||
}
|
||||
|
||||
return littleEndian
|
||||
? decodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller)
|
||||
: decodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller);
|
||||
}
|
||||
|
||||
private TCoderResult decodeLE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
if (inPos + 2 > inSize) {
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int b1 = inArray[inPos++] & 0xFF;
|
||||
int b2 = inArray[inPos++] & 0xFF;
|
||||
char c = (char) (b1 | (b2 << 8));
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos + 2 >= inSize) {
|
||||
if (!controller.hasMoreInput(4)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
inPos -= 2;
|
||||
break;
|
||||
}
|
||||
b1 = inArray[inPos++] & 0xFF;
|
||||
b2 = inArray[inPos++] & 0xFF;
|
||||
char next = (char) (b1 | (b2 << 8));
|
||||
if (!Character.isLowSurrogate(next)) {
|
||||
inPos -= 4;
|
||||
result = TCoderResult.malformedForLength(4);
|
||||
break;
|
||||
} else {
|
||||
if (outPos + 2 > outSize) {
|
||||
if (!controller.hasMoreOutput(2)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
outArray[outPos++] = c;
|
||||
outArray[outPos++] = next;
|
||||
}
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(2);
|
||||
break;
|
||||
} else {
|
||||
outArray[outPos++] = c;
|
||||
}
|
||||
}
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
|
||||
private TCoderResult decodeBE(byte[] inArray, int inPos, int inSize, char[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
if (inPos + 2 > inSize) {
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int b1 = inArray[inPos++] & 0xFF;
|
||||
int b2 = inArray[inPos++] & 0xFF;
|
||||
char c = (char) (b2 | (b1 << 8));
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos + 2 >= inSize) {
|
||||
if (!controller.hasMoreInput(4)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
inPos -= 2;
|
||||
break;
|
||||
}
|
||||
b1 = inArray[inPos++] & 0xFF;
|
||||
b2 = inArray[inPos++] & 0xFF;
|
||||
char next = (char) (b2 | (b1 << 8));
|
||||
if (!Character.isLowSurrogate(next)) {
|
||||
inPos -= 4;
|
||||
result = TCoderResult.malformedForLength(4);
|
||||
break;
|
||||
} else {
|
||||
if (outPos + 2 > outSize) {
|
||||
if (!controller.hasMoreOutput(2)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
outArray[outPos++] = c;
|
||||
outArray[outPos++] = next;
|
||||
}
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(2);
|
||||
break;
|
||||
} else {
|
||||
outArray[outPos++] = c;
|
||||
}
|
||||
}
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
public class TUTF16Encoder extends TBufferedEncoder {
|
||||
private boolean bom;
|
||||
private boolean littleEndian;
|
||||
|
||||
public TUTF16Encoder(TCharset cs, boolean bom, boolean littleEndian) {
|
||||
super(cs, 2, 4);
|
||||
this.bom = bom;
|
||||
this.littleEndian = littleEndian;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TCoderResult arrayEncode(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
if (bom) {
|
||||
if (outPos + 2 > outSize) {
|
||||
return controller.hasMoreOutput() ? null : TCoderResult.OVERFLOW;
|
||||
}
|
||||
bom = false;
|
||||
if (littleEndian) {
|
||||
outArray[outPos++] = (byte) 0xFF;
|
||||
outArray[outPos++] = (byte) 0xFE;
|
||||
} else {
|
||||
outArray[outPos++] = (byte) 0xFE;
|
||||
outArray[outPos++] = (byte) 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
return littleEndian
|
||||
? arrayEncodeLE(inArray, inPos, inSize, outArray, outPos, outSize, controller)
|
||||
: arrayEncodeBE(inArray, inPos, inSize, outArray, outPos, outSize, controller);
|
||||
}
|
||||
|
||||
private TCoderResult arrayEncodeLE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
char c = inArray[inPos++];
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos == inSize) {
|
||||
inPos--;
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
char next = inArray[inPos++];
|
||||
if (Character.isLowSurrogate(next)) {
|
||||
if (outPos + 4 <= outSize) {
|
||||
outArray[outPos++] = (byte) (c & 0xFF);
|
||||
outArray[outPos++] = (byte) (c >> 8);
|
||||
outArray[outPos++] = (byte) (next & 0xFF);
|
||||
outArray[outPos++] = (byte) (next >> 8);
|
||||
} else {
|
||||
inPos -= 2;
|
||||
if (!controller.hasMoreOutput(4)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
inPos--;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
} else {
|
||||
if (outPos + 2 <= outSize) {
|
||||
outArray[outPos++] = (byte) (c & 0xFF);
|
||||
outArray[outPos++] = (byte) (c >> 8);
|
||||
} else {
|
||||
inPos--;
|
||||
if (!controller.hasMoreOutput(2)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
|
||||
private TCoderResult arrayEncodeBE(char[] inArray, int inPos, int inSize, byte[] outArray, int outPos, int outSize,
|
||||
Controller controller) {
|
||||
TCoderResult result = null;
|
||||
while (inPos < inSize && outPos < outSize) {
|
||||
char c = inArray[inPos++];
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (inPos == inSize) {
|
||||
inPos--;
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
char next = inArray[inPos++];
|
||||
if (Character.isLowSurrogate(next)) {
|
||||
if (outPos + 4 <= outSize) {
|
||||
outArray[outPos++] = (byte) (c >> 8);
|
||||
outArray[outPos++] = (byte) (c & 0xFF);
|
||||
outArray[outPos++] = (byte) (next >> 8);
|
||||
outArray[outPos++] = (byte) (next & 0xFF);
|
||||
} else {
|
||||
inPos -= 2;
|
||||
if (!controller.hasMoreOutput(4)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
inPos--;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
} else {
|
||||
if (outPos + 2 <= outSize) {
|
||||
outArray[outPos++] = (byte) (c >> 8);
|
||||
outArray[outPos++] = (byte) (c & 0xFF);
|
||||
} else {
|
||||
inPos--;
|
||||
if (!controller.hasMoreOutput(2)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
controller.setInPosition(inPos);
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -20,7 +20,9 @@ import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
|||
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
|
||||
|
||||
public class TUTF8Charset extends TCharset {
|
||||
public TUTF8Charset() {
|
||||
public static final TUTF8Charset INSTANCE = new TUTF8Charset();
|
||||
|
||||
private TUTF8Charset() {
|
||||
super("UTF-8", new String[0]);
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
|||
} else if ((b & 0xE0) == 0xC0) {
|
||||
if (inPos >= inSize) {
|
||||
--inPos;
|
||||
if (!controller.hasMoreInput()) {
|
||||
if (!controller.hasMoreInput(2)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
|
@ -49,7 +49,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
|||
} else if ((b & 0xF0) == 0xE0) {
|
||||
if (inPos + 2 > inSize) {
|
||||
--inPos;
|
||||
if (!controller.hasMoreInput()) {
|
||||
if (!controller.hasMoreInput(3)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
|
@ -71,7 +71,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
|||
} else if ((b & 0xF8) == 0xF0) {
|
||||
if (inPos + 3 > inSize) {
|
||||
--inPos;
|
||||
if (!controller.hasMoreInput()) {
|
||||
if (!controller.hasMoreInput(4)) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.teavm.junit.TeaVMTestRunner;
|
||||
import org.teavm.junit.WholeClassCompilation;
|
||||
|
||||
@RunWith(TeaVMTestRunner.class)
|
||||
@WholeClassCompilation
|
||||
public class AsciiTest {
|
||||
private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText));
|
||||
|
||||
private static byte[] convert(String value) {
|
||||
byte[] result = new byte[value.length()];
|
||||
for (int i = 0; i < value.length(); ++i) {
|
||||
result[i] = (byte) value.charAt(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode1() {
|
||||
runEncode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode2() {
|
||||
runEncode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode3() {
|
||||
runEncode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode1() {
|
||||
runDecode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode2() {
|
||||
runDecode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode3() {
|
||||
runDecode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeUnmappable() {
|
||||
Charset charset = StandardCharsets.US_ASCII;
|
||||
CharsetTestCommon.checkUnmappable(charset, "ц", 1);
|
||||
CharsetTestCommon.checkUnmappable(charset, "£", 1);
|
||||
CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeMalformed() {
|
||||
Charset charset = StandardCharsets.US_ASCII;
|
||||
CharsetTestCommon.checkMalformed(charset, new byte[] { (byte) 0xA3 }, 1);
|
||||
}
|
||||
|
||||
private void runEncode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize);
|
||||
}
|
||||
|
||||
private void runDecode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.US_ASCII, inSize, outSize);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.util.Arrays;
|
||||
|
||||
final class CharsetTestCommon {
|
||||
private static char[] hexDigits = "0123456789ABCDEF".toCharArray();
|
||||
|
||||
static final String text = ""
|
||||
+ "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь "
|
||||
+ "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, могли ясно "
|
||||
+ "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас выше, что "
|
||||
+ "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, произошла "
|
||||
+ "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного странного "
|
||||
+ "обстоятельства, может быть вовсе не связанного ничем с ходом дела. "
|
||||
+ "Дней десять назад зашел ко мне Рогожин, "
|
||||
+ "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина прежде, "
|
||||
+ "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и приходил "
|
||||
+ "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, "
|
||||
+ "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой день сам, "
|
||||
+ "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего продолжать "
|
||||
+ "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. "
|
||||
+ "Между нами был такой контраст, "
|
||||
+ "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, а он - "
|
||||
+ "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о „последних“ "
|
||||
+ "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на чем он "
|
||||
+ "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому литератору, не "
|
||||
+ "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек с умом и "
|
||||
+ "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем „последнем "
|
||||
+ "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. "
|
||||
+ "Он промолчал, он ужасно молчалив. "
|
||||
+ "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - "
|
||||
+ "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам вовсе не "
|
||||
+ "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и кислою "
|
||||
+ "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и просто-запросто вывел "
|
||||
+ "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил меня; "
|
||||
+ "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, "
|
||||
+ "непосредственная жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке.";
|
||||
|
||||
static String asciiText = ""
|
||||
+ "Meanwhile, the various members of Sleary`s company gradually gathered together from the upper "
|
||||
+ "regions, where they were quartered, and, from standing about, talking in low voices to one another "
|
||||
+ "and to Mr. Childers, gradually insinuated themselves and him into the room. There were two or three "
|
||||
+ "handsome young women among them, with their two or three husbands, and their two or three mothers, "
|
||||
+ "and their eight or nine little children, who did the fairy business when required. "
|
||||
+ "The father of one of the families was in the habit of balancing the father of another of the families "
|
||||
+ "on the top of a great pole; the father of a third family often made a pyramid of both those fathers, "
|
||||
+ "with Master Kidderminster for the apex, and himself for the base; all the fathers could dance upon "
|
||||
+ "rolling casks, stand upon bottles, catch knives and balls, twirl hand-basins, ride upon anything, "
|
||||
+ "jump over everything, and stick at nothing. All the mothers could (and did) dance, upon the slack "
|
||||
+ "wire and the tight-rope, and perform rapid acts on bare-backed steeds; none of them were at all "
|
||||
+ "particular in respect of showing their legs; and one of them, alone in a Greek chariot, drove six "
|
||||
+ "in hand into every town they came to. They all assumed to be mighty rakish and knowing, they were "
|
||||
+ "not very tidy in their private dresses, they were not at all orderly in their domestic arrangements, "
|
||||
+ "and the combined literature of the whole company would have produced but a poor letter on any subject. "
|
||||
+ "Yet there was a remarkable gentleness and childishness about these people, a special inaptitude "
|
||||
+ "for any kind of sharp practice, and an untiring readiness to help and pity one another, deserving "
|
||||
+ "often of as much respect, and always of as much generous construction, as the every-day virtues of "
|
||||
+ "any class of people in the world.";
|
||||
|
||||
private CharsetTestCommon() {
|
||||
}
|
||||
|
||||
static void runEncode(String hex, String text, Charset charset, int inSize, int outSize) {
|
||||
char[] input = text.toCharArray();
|
||||
byte[] output = new byte[16384];
|
||||
int inPos = 0;
|
||||
int outPos = 0;
|
||||
CharsetEncoder encoder = charset.newEncoder();
|
||||
CoderResult result;
|
||||
|
||||
do {
|
||||
int inLen = Math.min(inSize, input.length - inPos);
|
||||
CharBuffer in = CharBuffer.wrap(input, inPos, inLen);
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
|
||||
result = encoder.encode(in, out, inPos + inLen >= input.length);
|
||||
inPos = in.position();
|
||||
outPos = out.position();
|
||||
} while (!result.isError() && inPos < input.length);
|
||||
|
||||
assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result);
|
||||
|
||||
do {
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
|
||||
result = encoder.flush(out);
|
||||
outPos = out.position();
|
||||
} while (!result.isUnderflow());
|
||||
|
||||
assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result);
|
||||
output = Arrays.copyOf(output, outPos);
|
||||
assertEquals(hex, bytesToHex(output));
|
||||
}
|
||||
|
||||
static void runDecode(String hex, String text, Charset charset, int inSize, int outSize) {
|
||||
byte[] input = hexToBytes(hex);
|
||||
char[] output = new char[16384];
|
||||
int inPos = 0;
|
||||
int outPos = 0;
|
||||
CharsetDecoder decoder = charset.newDecoder();
|
||||
CoderResult result;
|
||||
|
||||
do {
|
||||
int inLen = Math.min(inSize, input.length - inPos);
|
||||
ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen);
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
|
||||
result = decoder.decode(in, out, inPos + inLen >= input.length);
|
||||
inPos = in.position();
|
||||
outPos = out.position();
|
||||
} while (!result.isError() && inPos < input.length);
|
||||
|
||||
assertEquals("Should be UNDERFLOW after encoding", CoderResult.UNDERFLOW, result);
|
||||
|
||||
do {
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
|
||||
result = decoder.flush(out);
|
||||
outPos = out.position();
|
||||
} while (!result.isUnderflow());
|
||||
|
||||
assertEquals("Should be UNDERFLOW after flushing", CoderResult.UNDERFLOW, result);
|
||||
output = Arrays.copyOf(output, outPos);
|
||||
assertEquals(text, new String(output));
|
||||
}
|
||||
|
||||
static String bytesToHex(byte[] bytes) {
|
||||
char[] result = new char[bytes.length * 2];
|
||||
int j = 0;
|
||||
for (int i = 0; i < bytes.length; ++i) {
|
||||
int b = bytes[i] & 0xFF;
|
||||
result[j++] = hexDigits[b >> 4];
|
||||
result[j++] = hexDigits[b & 0xF];
|
||||
}
|
||||
return new String(result);
|
||||
}
|
||||
|
||||
static byte[] hexToBytes(String hex) {
|
||||
char[] chars = hex.toCharArray();
|
||||
byte[] result = new byte[chars.length / 2];
|
||||
int j = 0;
|
||||
for (int i = 0; i < chars.length; i += 2) {
|
||||
char hi = chars[i];
|
||||
char lo = chars[i + 1];
|
||||
result[j++] = (byte) ((digit(hi) << 4) | digit(lo));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static int digit(char c) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
return c - '0';
|
||||
}
|
||||
return c - 'A' + 10;
|
||||
}
|
||||
|
||||
static void checkUnmappable(Charset charset, String text, int length) {
|
||||
CharBuffer input = CharBuffer.wrap(text);
|
||||
byte[] result = new byte[100];
|
||||
ByteBuffer output = ByteBuffer.wrap(result);
|
||||
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
|
||||
assertEquals(CoderResult.unmappableForLength(length), coderResult);
|
||||
assertEquals(0, input.position());
|
||||
assertEquals(0, output.position());
|
||||
}
|
||||
|
||||
static void checkMalformed(Charset charset, String text, int length) {
|
||||
CharBuffer input = CharBuffer.wrap(text);
|
||||
byte[] result = new byte[100];
|
||||
ByteBuffer output = ByteBuffer.wrap(result);
|
||||
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
|
||||
assertEquals(CoderResult.malformedForLength(length), coderResult);
|
||||
assertEquals(0, input.position());
|
||||
assertEquals(0, output.position());
|
||||
}
|
||||
|
||||
static void checkMalformed(Charset charset, byte[] data, int length) {
|
||||
ByteBuffer input = ByteBuffer.wrap(data);
|
||||
CharBuffer output = CharBuffer.wrap(new char[100]);
|
||||
CoderResult coderResult = charset.newDecoder().decode(input, output, true);
|
||||
assertEquals(CoderResult.malformedForLength(length), coderResult);
|
||||
assertEquals(0, input.position());
|
||||
assertEquals(0, output.position());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.teavm.junit.TeaVMTestRunner;
|
||||
import org.teavm.junit.WholeClassCompilation;
|
||||
|
||||
@RunWith(TeaVMTestRunner.class)
|
||||
@WholeClassCompilation
|
||||
public class Iso8859Test {
|
||||
private static String hex = CharsetTestCommon.bytesToHex(convert(CharsetTestCommon.asciiText));
|
||||
|
||||
private static byte[] convert(String value) {
|
||||
byte[] result = new byte[value.length()];
|
||||
for (int i = 0; i < value.length(); ++i) {
|
||||
result[i] = (byte) value.charAt(i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode1() {
|
||||
runEncode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode2() {
|
||||
runEncode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode3() {
|
||||
runEncode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode1() {
|
||||
runDecode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode2() {
|
||||
runDecode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode3() {
|
||||
runDecode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeUnmappable() {
|
||||
Charset charset = StandardCharsets.ISO_8859_1;
|
||||
CharsetTestCommon.checkUnmappable(charset, "ц", 1);
|
||||
CharsetTestCommon.checkUnmappable(charset, "\uD800\uDC00b", 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeNonAscii() {
|
||||
assertEquals("£", new String(new byte[] { (byte) 0xA3 }, StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeNonAscii() {
|
||||
assertArrayEquals(new byte[] { (byte) 0xA3 }, "£".getBytes(StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
|
||||
private void runEncode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runEncode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize);
|
||||
}
|
||||
|
||||
private void runDecode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runDecode(hex, CharsetTestCommon.asciiText, StandardCharsets.ISO_8859_1, inSize, outSize);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright 2021 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.teavm.junit.TeaVMTestRunner;
|
||||
import org.teavm.junit.WholeClassCompilation;
|
||||
|
||||
@RunWith(TeaVMTestRunner.class)
|
||||
@WholeClassCompilation
|
||||
public class UTF16Test {
|
||||
private static String hexLE = CharsetTestCommon.bytesToHex(convertLE(CharsetTestCommon.text));
|
||||
private static String hexBE = CharsetTestCommon.bytesToHex(convertBE(CharsetTestCommon.text));
|
||||
private static String hexLEBom = "FFFE" + hexLE;
|
||||
private static String hexBEBom = "FEFF" + hexBE;
|
||||
private static boolean littleEndian = "0".getBytes(StandardCharsets.UTF_16)[0] == 0xFF;
|
||||
|
||||
private static byte[] convertLE(String text) {
|
||||
byte[] data = new byte[text.length() * 2];
|
||||
for (int i = 0; i < text.length(); ++i) {
|
||||
char c = text.charAt(i);
|
||||
data[i * 2] = (byte) (c & 0xFF);
|
||||
data[i * 2 + 1] = (byte) ((c >> 8) & 0xFF);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
private static byte[] convertBE(String text) {
|
||||
byte[] data = new byte[text.length() * 2];
|
||||
for (int i = 0; i < text.length(); ++i) {
|
||||
char c = text.charAt(i);
|
||||
data[i * 2] = (byte) ((c >> 8) & 0xFF);
|
||||
data[i * 2 + 1] = (byte) (c & 0xFF);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode1() {
|
||||
runEncode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode2() {
|
||||
runEncode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode3() {
|
||||
runEncode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encode4() {
|
||||
runEncode(600, 99);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode1() {
|
||||
runDecode(600, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode2() {
|
||||
runDecode(600, 100);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode3() {
|
||||
runDecode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decode4() {
|
||||
runDecode(99, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeMalformedSurrogate() {
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800\uD800", 1);
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uD800a", 1);
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16LE, "\uDC00\uD800", 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeSurrogate() {
|
||||
String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE));
|
||||
assertEquals("D800DC00", hex);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeSurrogate() {
|
||||
String hex = CharsetTestCommon.bytesToHex("\uD800\uDC00".getBytes(StandardCharsets.UTF_16BE));
|
||||
assertEquals("D800DC00", hex);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeMalformedSurrogate() {
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D800D800"), 4);
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("D8000041"), 4);
|
||||
CharsetTestCommon.checkMalformed(StandardCharsets.UTF_16BE, CharsetTestCommon.hexToBytes("DC00D800"), 2);
|
||||
}
|
||||
|
||||
private void runEncode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runEncode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize);
|
||||
CharsetTestCommon.runEncode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize);
|
||||
CharsetTestCommon.runEncode(littleEndian ? hexLEBom : hexBEBom, CharsetTestCommon.text,
|
||||
StandardCharsets.UTF_16, inSize, outSize);
|
||||
}
|
||||
|
||||
private void runDecode(int inSize, int outSize) {
|
||||
CharsetTestCommon.runDecode(hexLE, CharsetTestCommon.text, StandardCharsets.UTF_16LE, inSize, outSize);
|
||||
CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16BE, inSize, outSize);
|
||||
CharsetTestCommon.runDecode(hexBEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
|
||||
CharsetTestCommon.runDecode(hexLEBom, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
|
||||
CharsetTestCommon.runDecode(hexBE, CharsetTestCommon.text, StandardCharsets.UTF_16, inSize, outSize);
|
||||
}
|
||||
}
|
|
@ -17,13 +17,11 @@ package org.teavm.classlib.java.nio.charset;
|
|||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
@ -33,54 +31,8 @@ import org.teavm.junit.WholeClassCompilation;
|
|||
@RunWith(TeaVMTestRunner.class)
|
||||
@WholeClassCompilation
|
||||
public class UTF8Test {
|
||||
private static char[] hexDigits = "0123456789ABCDEF".toCharArray();
|
||||
// Fragment from "The Idiot" by F. Dostoevsky
|
||||
private String text =
|
||||
"" + "Здесь в моем объяснении я отмечаю все эти цифры и числа. Мне, конечно, всё равно будет, но теперь "
|
||||
+ "(и, может быть, только в эту минуту) я желаю, чтобы те, которые будут судить мой поступок, "
|
||||
+ "могли ясно "
|
||||
+ "видеть, из какой логической цепи выводов вышло мое „последнее убеждение“. Я написал сейчас "
|
||||
+ "выше, что "
|
||||
+ "окончательная решимость, которой недоставало мне для исполнения моего „последнего убеждения“, "
|
||||
+ "произошла "
|
||||
+ "во мне, кажется, вовсе не из логического вывода, а от какого-то странного толчка, от одного "
|
||||
+ "странного " + "обстоятельства, может быть вовсе не связанного ничем с ходом дела. "
|
||||
+ "Дней десять назад зашел ко мне Рогожин, "
|
||||
+ "по одному своему делу, о котором здесь лишнее распространяться. Я никогда не видал Рогожина "
|
||||
+ "прежде, "
|
||||
+ "но слышал о нем очень многое. Я дал ему все нужные справки, и он скоро ушел, а так как он и "
|
||||
+ "приходил "
|
||||
+ "только за справками, то тем бы дело между нами и кончилось. Но он слишком заинтересовал меня, "
|
||||
+ "и весь этот день я был под влиянием странных мыслей, так что решился пойти к нему на другой "
|
||||
+ "день сам, "
|
||||
+ "отдать визит. Рогожин был мне очевидно не рад и даже „деликатно“ намекнул, что нам нечего "
|
||||
+ "продолжать " + "знакомство; но все-таки я провел очень любопытный час, как, вероятно, и он. "
|
||||
+ "Между нами был такой контраст, "
|
||||
+ "который не мог не сказаться нам обоим, особенно мне: я был человек, уже сосчитавший дни свои, "
|
||||
+ "а он - "
|
||||
+ "живущий самою полною, непосредственною жизнью, настоящею минутой, без всякой заботы о "
|
||||
+ "„последних“ "
|
||||
+ "выводах, цифрах или о чем бы то ни было, не касающемся того, на чем... на чем... ну хоть на "
|
||||
+ "чем он " + "помешан; пусть простит мне это выражение господин Рогожин, пожалуй хоть как плохому "
|
||||
+ "литератору, не "
|
||||
+ "умевшему выразить свою мысль. Несмотря на всю его нелюбезность, мне показалось, что он человек"
|
||||
+ " с умом и "
|
||||
+ "может многое понимать, хотя его мало что интересует из постороннего. Я не намекал ему о моем "
|
||||
+ "„последнем " + "убеждении“, но мне почему-то показалось, что он, слушая меня, угадал его. "
|
||||
+ "Он промолчал, он ужасно молчалив. "
|
||||
+ "Я намекнул ему, уходя, что, несмотря на всю между нами разницу и на все противоположности, - "
|
||||
+ "les extrémités se touchent 1 (я растолковал ему это по-русски), так что, может быть, он и сам "
|
||||
+ "вовсе не "
|
||||
+ "так далек от моего „последнего убеждения“, как кажется. На это он ответил мне очень угрюмою и "
|
||||
+ "кислою " + "гримасой, встал, сам сыскал мне мою фуражку, сделав вид, будто бы я сам ухожу, и "
|
||||
+ "просто-запросто вывел "
|
||||
+ "меня из своего мрачного дома под видом того, что провожает меня из учтивости. Дом его поразил "
|
||||
+ "меня; " + "похож на кладбище, а ему, кажется, нравится, что, впрочем, понятно: такая полная, "
|
||||
+ "непосредственная "
|
||||
+ "жизнь, которою он живет, слишком полна сама по себе, чтобы нуждаться в обстановке.";
|
||||
private String hex = ""
|
||||
+
|
||||
"D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED"
|
||||
+ "D097D0B4D0B5D181D18C20D0B220D0BCD0BED0B5D0BC20D0BED0B1D18AD18FD181D0BDD0B5D0BDD0B8D0B820D18F20D0BED"
|
||||
+ "182D0BCD0B5D187D0B0D18E20D0B2D181D0B520D18DD182D0B820D186D0B8D184D180D18B20D0B820D187D0B8D181D0BBD"
|
||||
+ "0B02E20D09CD0BDD0B52C20D0BAD0BED0BDD0B5D187D0BDD0BE2C20D0B2D181D19120D180D0B0D0B2D0BDD0BE20D0B1D18"
|
||||
+ "3D0B4D0B5D1822C20D0BDD0BE20D182D0B5D0BFD0B5D180D18C2028D0B82C20D0BCD0BED0B6D0B5D18220D0B1D18BD182D"
|
||||
|
@ -210,16 +162,29 @@ public class UTF8Test {
|
|||
|
||||
@Test
|
||||
public void replaceMalformedSurrogatePair() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
ByteBuffer buffer = charset.encode("a\uD800\uD800b");
|
||||
byte[] result = new byte[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertArrayEquals(new byte[] { 97, 63, 63, 98 }, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void malformedSurrogatePair() {
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
byte[] result = new byte[100];
|
||||
CharBuffer input = CharBuffer.wrap("a\uD800\uD800b");
|
||||
ByteBuffer output = ByteBuffer.wrap(result);
|
||||
CoderResult coderResult = charset.newEncoder().encode(input, output, true);
|
||||
assertEquals(CoderResult.malformedForLength(1), coderResult);
|
||||
result = Arrays.copyOf(result, output.position());
|
||||
assertArrayEquals(new byte[] { 97 }, result);
|
||||
assertEquals(1, input.position());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeSurrogate() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
ByteBuffer buffer = charset.encode("a\uD800\uDC00b");
|
||||
byte[] result = new byte[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
|
@ -228,7 +193,7 @@ public class UTF8Test {
|
|||
|
||||
@Test
|
||||
public void encodeSupplementary() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.appendCodePoint(0xfedcb);
|
||||
ByteBuffer buffer = charset.encode(sb.toString());
|
||||
|
@ -239,7 +204,7 @@ public class UTF8Test {
|
|||
|
||||
@Test
|
||||
public void replaceMalformedFirstByte() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xFF, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
|
@ -248,7 +213,7 @@ public class UTF8Test {
|
|||
|
||||
@Test
|
||||
public void replaceMalformedMidByte() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte) 0xC0, 98, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
|
@ -265,115 +230,17 @@ public class UTF8Test {
|
|||
bytes[i++] = -78;
|
||||
bytes[i++] = -69;
|
||||
}
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
Charset charset = StandardCharsets.UTF_8;
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(bytes));
|
||||
assertEquals('\uD8BB', buffer.get(8190));
|
||||
assertEquals('\uDCBB', buffer.get(8191));
|
||||
}
|
||||
|
||||
private void runEncode(int inSize, int outSize) {
|
||||
char[] input = text.toCharArray();
|
||||
byte[] output = new byte[16384];
|
||||
int inPos = 0;
|
||||
int outPos = 0;
|
||||
CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
|
||||
CoderResult result = CoderResult.UNDERFLOW;
|
||||
|
||||
while (true) {
|
||||
int inLen = Math.min(inSize, input.length - inPos);
|
||||
CharBuffer in = CharBuffer.wrap(input, inPos, inLen);
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
|
||||
result = encoder.encode(in, out, inPos + inLen >= input.length);
|
||||
inPos = in.position();
|
||||
outPos = out.position();
|
||||
if (result.isError() || inPos >= input.length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow());
|
||||
|
||||
while (true) {
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
ByteBuffer out = ByteBuffer.wrap(output, outPos, outLen);
|
||||
result = encoder.flush(out);
|
||||
outPos = out.position();
|
||||
if (result.isUnderflow()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow());
|
||||
output = Arrays.copyOf(output, outPos);
|
||||
assertEquals(hex, bytesToHex(output));
|
||||
CharsetTestCommon.runEncode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize);
|
||||
}
|
||||
|
||||
private void runDecode(int inSize, int outSize) {
|
||||
byte[] input = hexToBytes(hex);
|
||||
char[] output = new char[16384];
|
||||
int inPos = 0;
|
||||
int outPos = 0;
|
||||
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
|
||||
CoderResult result = CoderResult.UNDERFLOW;
|
||||
|
||||
while (true) {
|
||||
int inLen = Math.min(inSize, input.length - inPos);
|
||||
ByteBuffer in = ByteBuffer.wrap(input, inPos, inLen);
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
|
||||
result = decoder.decode(in, out, inPos + inLen >= input.length);
|
||||
inPos = in.position();
|
||||
outPos = out.position();
|
||||
if (result.isError() || inPos >= input.length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue("Should be UNDERFLOW after encoding", result.isUnderflow());
|
||||
|
||||
while (true) {
|
||||
int outLen = Math.min(outSize, output.length - outPos);
|
||||
CharBuffer out = CharBuffer.wrap(output, outPos, outLen);
|
||||
result = decoder.flush(out);
|
||||
outPos = out.position();
|
||||
if (result.isUnderflow()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue("Should be UNDERFLOW after flushing", result.isUnderflow());
|
||||
output = Arrays.copyOf(output, outPos);
|
||||
assertEquals(text, new String(output));
|
||||
}
|
||||
|
||||
private String bytesToHex(byte[] bytes) {
|
||||
char[] result = new char[bytes.length * 2];
|
||||
int j = 0;
|
||||
for (int i = 0; i < bytes.length; ++i) {
|
||||
int b = bytes[i] & 0xFF;
|
||||
result[j++] = hexDigits[b >> 4];
|
||||
result[j++] = hexDigits[b & 0xF];
|
||||
}
|
||||
return new String(result);
|
||||
}
|
||||
|
||||
private byte[] hexToBytes(String hex) {
|
||||
char[] chars = hex.toCharArray();
|
||||
byte[] result = new byte[chars.length / 2];
|
||||
int j = 0;
|
||||
for (int i = 0; i < chars.length; i += 2) {
|
||||
char hi = chars[i];
|
||||
char lo = chars[i + 1];
|
||||
result[j++] = (byte) ((digit(hi) << 4) | digit(lo));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static int digit(char c) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
return c - '0';
|
||||
}
|
||||
return c - 'A' + 10;
|
||||
CharsetTestCommon.runDecode(hex, CharsetTestCommon.text, StandardCharsets.UTF_8, inSize, outSize);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user