diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/ByteBuffer.java b/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/ByteBuffer.java deleted file mode 100644 index 0f07a668c..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/ByteBuffer.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2013 Alexey Andreev. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.teavm.classlib.impl.charset; - -/** - * - * @author Alexey Andreev - */ -public class ByteBuffer { - private byte[] data; - private int end; - private int pos; - - public ByteBuffer(byte[] data) { - this(data, 0, data.length); - } - - public ByteBuffer(byte[] data, int start, int end) { - this.data = data; - this.end = end; - this.pos = start; - } - - public void put(byte b) { - data[pos++] = b; - } - - public void rewind(int start) { - this.pos = start; - } - - public int available() { - return end - pos; - } - - public void back(int count) { - pos -= count; - } - - public boolean end() { - return pos == end; - } - - public void skip(int count) { - pos += count; - } - - public byte get() { - return data[pos++]; - } - - public int position() { - return pos; - } - - public void put(ByteBuffer buffer) { - while (buffer.pos < buffer.end) { - data[pos++] = buffer.data[buffer.pos++]; - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/CharBuffer.java b/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/CharBuffer.java deleted file mode 100644 index 78ef94423..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/CharBuffer.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2013 Alexey Andreev. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.teavm.classlib.impl.charset; - -/** - * - * @author Alexey Andreev - */ -public class CharBuffer { - private char[] data; - private int end; - private int pos; - - public CharBuffer(char[] data, int start, int end) { - this.data = data; - this.end = end; - this.pos = start; - } - - public CharBuffer(char[] data) { - this(data, 0, data.length); - } - - public void put(char b) { - data[pos++] = b; - } - - public void rewind(int start) { - this.pos = start; - } - - public int available() { - return end - pos; - } - - public void back(int count) { - pos -= count; - } - - public boolean end() { - return pos == end; - } - - public char get() { - return data[pos++]; - } - - public int position() { - return pos; - } - - public void skip(int count) { - pos += count; - } - - public void put(CharBuffer buffer) { - int sz = Math.min(buffer.end - buffer.pos, end - pos); - for (int i = 0; i < sz; ++i) { - data[pos++] = buffer.data[buffer.pos++]; - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/Charset.java b/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/Charset.java deleted file mode 100644 index ad47c0de0..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/Charset.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2013 Alexey Andreev. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.teavm.classlib.impl.charset; - -/** - * - * @author Alexey Andreev - */ -public abstract class Charset { - public abstract void encode(CharBuffer source, ByteBuffer dest); - - public abstract void decode(ByteBuffer source, CharBuffer dest); - - public static Charset get(String name) { - if (name.toUpperCase().equals("UTF-8")) { - return new UTF8Charset(); - } - return null; - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/UTF8Charset.java b/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/UTF8Charset.java deleted file mode 100644 index 1eddfa602..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/impl/charset/UTF8Charset.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2013 Alexey Andreev. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.teavm.classlib.impl.charset; - -/** - * - * @author Alexey Andreev - */ -public class UTF8Charset extends Charset { - @Override - public void encode(CharBuffer source, ByteBuffer dest) { - while (!source.end() && dest.available() >= 4) { - char ch = source.get(); - if (ch < 0x80) { - dest.put((byte)ch); - } else if (ch < 0x400) { - dest.put((byte)(0xC0 | (ch >> 6))); - dest.put((byte)(0x80 | (ch & 0x3F))); - } else if (!Character.isSurrogate(ch)) { - dest.put((byte)(0xE0 | (ch >> 12))); - dest.put((byte)(0x80 | ((ch >> 6) & 0x3F))); - dest.put((byte)(0x80 | (ch & 0x3F))); - } else if (Character.isHighSurrogate(ch)) { - char low = source.get(); - if (!Character.isLowSurrogate(low)) { - source.back(1); - dest.put((byte)'?'); - } else { - int codePoint = Character.toCodePoint(ch, low); - dest.put((byte)(0xF0 | (codePoint >> 18))); - dest.put((byte)(0x80 | ((codePoint >> 12) & 0x3F))); - dest.put((byte)(0x80 | ((codePoint >> 6) & 0x3F))); - dest.put((byte)(0x80 | (codePoint & 0x3F))); - } - } else { - dest.put((byte)'?'); - } - } - } - - @Override - public void decode(ByteBuffer source, CharBuffer dest) { - while (!source.end() && dest.available() >= 2) { - int b = source.get() & 0xFF; - if ((b & 0x80) == 0) { - dest.put((char)b); - } else if ((b & 0xE0) == 0xC0) { - if (source.end()) { - dest.put((char)b); - return; - } - dest.put((char)(((b & 0x1F) << 6) | (source.get() & 0x3F))); - } else if ((b & 0xF0) == 0xE0) { - if (source.available() < 2) { - source.skip(source.available()); - dest.put((char)b); - return; - } - byte b2 = source.get(); - byte b3 = source.get(); - char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F)); - dest.put(!Character.isHighSurrogate(c) ? c : '?'); - } else if ((b & 0xF8) == 0xF0) { - if (source.available() < 3) { - source.skip(source.available()); - dest.put((char)b); - return; - } - byte b2 = source.get(); - byte b3 = source.get(); - byte b4 = source.get(); - int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F); - dest.put(Character.highSurrogate(code)); - dest.put(Character.lowSurrogate(code)); - } - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java index 08b6054e1..6535ed6e6 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TInputStreamReader.java @@ -21,6 +21,7 @@ import org.teavm.classlib.java.nio.TCharBuffer; import org.teavm.classlib.java.nio.charset.TCharset; import org.teavm.classlib.java.nio.charset.TCharsetDecoder; import org.teavm.classlib.java.nio.charset.TCodingErrorAction; +import org.teavm.classlib.java.nio.charset.TUnsupportedCharsetException; import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; /** @@ -29,8 +30,7 @@ import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; */ public class TInputStreamReader extends TReader { private TInputStream stream; - private TCharset charset; - private TString charsetName; + private TCharsetDecoder decoder; private byte[] inData = new byte[8192]; private TByteBuffer inBuffer = TByteBuffer.wrap(inData); private char[] outData = new char[1024]; @@ -38,25 +38,37 @@ public class TInputStreamReader extends TReader { private boolean streamEof; private boolean eof; - public TInputStreamReader(TInputStream in, TString charsetName) { - this(in, TCharset.forName(charsetName.toString())); - this.charsetName = charsetName; + public TInputStreamReader(TInputStream in, TString charsetName) throws TUnsupportedEncodingException { + this(in, getCharset(charsetName)); + } + + public TInputStreamReader(TInputStream in, TCharset charset) { + this(in, charset.newDecoder() + .onMalformedInput(TCodingErrorAction.REPLACE) + .onUnmappableCharacter(TCodingErrorAction.REPLACE)); } public TInputStreamReader(TInputStream in) { this(in, new TUTF8Charset()); - charsetName = TString.wrap("UTF-8"); } - public TInputStreamReader(TInputStream in, TCharset charset) { + public TInputStreamReader(TInputStream in, TCharsetDecoder decoder) { this.stream = in; - this.charset = charset; + this.decoder = decoder; outBuffer.position(outBuffer.limit()); inBuffer.position(inBuffer.limit()); } + private static TCharset getCharset(TString charsetName) throws TUnsupportedEncodingException { + try { + return TCharset.forName(charsetName.toString()); + } catch (TUnsupportedCharsetException e) { + throw new TUnsupportedEncodingException(charsetName); + } + } + public TString getEncoding() { - return charsetName; + return TString.wrap(decoder.charset().name()); } @Override @@ -98,18 +110,17 @@ public class TInputStreamReader extends TReader { return false; } outBuffer.compact(); - TCharsetDecoder decoder = charset.newDecoder() - .onMalformedInput(TCodingErrorAction.REPLACE) - .onUnmappableCharacter(TCodingErrorAction.IGNORE); while (true) { if (!inBuffer.hasRemaining() && !fillReadBuffer()) { - eof = true; break; } - if (decoder.decode(inBuffer, outBuffer, eof).isOverflow()) { + if (decoder.decode(inBuffer, outBuffer, streamEof).isOverflow()) { break; } } + if (!inBuffer.hasRemaining() && streamEof && decoder.flush(outBuffer).isUnderflow()) { + eof = true; + } outBuffer.flip(); return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java index 9063ff528..9ced7e453 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TOutputStreamWriter.java @@ -15,40 +15,61 @@ */ package org.teavm.classlib.java.io; -import org.teavm.classlib.impl.charset.ByteBuffer; -import org.teavm.classlib.impl.charset.CharBuffer; -import org.teavm.classlib.impl.charset.Charset; import org.teavm.classlib.java.lang.TString; +import org.teavm.classlib.java.nio.TByteBuffer; +import org.teavm.classlib.java.nio.TCharBuffer; +import org.teavm.classlib.java.nio.charset.*; +import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; public class TOutputStreamWriter extends TWriter { private TOutputStream out; - private String encoding; - private Charset charset; + private TCharsetEncoder encoder; private byte[] bufferData = new byte[512]; - private ByteBuffer buffer = new ByteBuffer(bufferData); + private TByteBuffer buffer = TByteBuffer.wrap(bufferData); + private boolean closed; public TOutputStreamWriter(TOutputStream out) { - this(out, "UTF-8"); + this(nullCheck(out), new TUTF8Charset()); } public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException { - super(out); - if (enc == null) { + this(nullCheck(out), getCharset(enc)); + } + + public TOutputStreamWriter(TOutputStream out, TCharset charset) { + this(nullCheck(out), charset.newEncoder() + .onMalformedInput(TCodingErrorAction.REPLACE) + .onUnmappableCharacter(TCodingErrorAction.REPLACE)); + } + + public TOutputStreamWriter(TOutputStream out, TCharsetEncoder encoder) { + this.out = nullCheck(out); + this.encoder = encoder; + } + + private static TOutputStream nullCheck(TOutputStream stream) { + if (stream == null) { throw new NullPointerException(); } - this.out = out; - charset = Charset.get(enc); - if (charset == null) { - throw new TUnsupportedEncodingException(TString.wrap(enc)); + return stream; + } + + private static TCharset getCharset(String charsetName) throws TUnsupportedEncodingException { + if (charsetName == null) { + throw new NullPointerException(); + } + try { + return TCharset.forName(charsetName); + } catch (TUnsupportedCharsetException | TIllegalCharsetNameException e) { + throw new TUnsupportedEncodingException(TString.wrap(charsetName)); } - encoding = enc; } @Override public void close() throws TIOException { - if (charset != null) { + if (!closed) { flush(); - charset = null; + closed = true; out.flush(); out.close(); } @@ -59,19 +80,19 @@ public class TOutputStreamWriter extends TWriter { checkStatus(); if (buffer.position() > 0) { out.write(bufferData, 0, buffer.position()); - buffer.rewind(0); + buffer.clear(); } out.flush(); } private void checkStatus() throws TIOException { - if (charset == null) { + if (closed) { throw new TIOException(TString.wrap("Writer already closed")); } } public String getEncoding() { - return encoding; + return encoder.charset().name(); } @Override @@ -84,30 +105,20 @@ public class TOutputStreamWriter extends TWriter { if (offset < 0 || offset > buf.length - count || count < 0) { throw new IndexOutOfBoundsException(); } - CharBuffer input = new CharBuffer(buf, offset, offset + count); - while (!input.end()) { - if (buffer.available() < 6) { + TCharBuffer input = TCharBuffer.wrap(buf, offset, count); + while (input.hasRemaining()) { + if (encoder.encode(input, buffer, false).isOverflow()) { out.write(bufferData, 0, buffer.position()); - buffer.rewind(0); + buffer.clear(); } - charset.encode(input, buffer); } } } @Override public void write(int oneChar) throws TIOException { - synchronized (lock) { - checkStatus(); - CharBuffer input = new CharBuffer(new char[] { (char)oneChar }, 0, 1); - while (!input.end()) { - if (buffer.available() < 6) { - out.write(bufferData, 0, buffer.position()); - buffer.rewind(0); - } - charset.encode(input, buffer); - } - } + char[] array = { (char)oneChar }; + write(array, 0, array.length); } @Override diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java index c36764ada..417f9376c 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/io/TPrintStream.java @@ -15,13 +15,14 @@ */ package org.teavm.classlib.java.io; -import org.teavm.classlib.impl.charset.ByteBuffer; -import org.teavm.classlib.impl.charset.CharBuffer; -import org.teavm.classlib.impl.charset.Charset; import org.teavm.classlib.java.lang.TMath; import org.teavm.classlib.java.lang.TObject; import org.teavm.classlib.java.lang.TString; import org.teavm.classlib.java.lang.TStringBuilder; +import org.teavm.classlib.java.nio.TByteBuffer; +import org.teavm.classlib.java.nio.TCharBuffer; +import org.teavm.classlib.java.nio.charset.*; +import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; /** * @@ -32,21 +33,22 @@ public class TPrintStream extends TFilterOutputStream { private boolean errorState; private TStringBuilder sb = new TStringBuilder(); private char[] buffer = new char[32]; - private Charset charset; + private TCharset charset; public TPrintStream(TOutputStream out, boolean autoFlush, TString encoding) throws TUnsupportedEncodingException { super(out); this.autoFlush = autoFlush; - charset = Charset.get(encoding.toString()); - if (charset == null) { - throw new TUnsupportedEncodingException(TString.wrap("Unsupported encoding: ").concat(encoding)); + try { + charset = TCharset.forName(encoding.toString()); + } catch (TUnsupportedCharsetException | TIllegalCharsetNameException e) { + throw new TUnsupportedEncodingException(encoding); } } public TPrintStream(TOutputStream out, boolean autoFlush) { super(out); this.autoFlush = autoFlush; - this.charset = Charset.get("UTF-8"); + this.charset = new TUTF8Charset(); } public TPrintStream(TOutputStream out) { @@ -131,13 +133,27 @@ public class TPrintStream extends TFilterOutputStream { } private void print(char[] s, int begin, int end) { - CharBuffer src = new CharBuffer(s, begin, end); + TCharBuffer src = TCharBuffer.wrap(s, begin, end - begin); byte[] destBytes = new byte[TMath.max(16, TMath.min(s.length, 1024))]; - ByteBuffer dest = new ByteBuffer(destBytes); - while (!src.end()) { - charset.encode(src, dest); + TByteBuffer dest = TByteBuffer.wrap(destBytes); + TCharsetEncoder encoder = charset.newEncoder() + .onMalformedInput(TCodingErrorAction.REPLACE) + .onUnmappableCharacter(TCodingErrorAction.REPLACE); + while (true) { + boolean overflow = encoder.encode(src, dest, true).isOverflow(); write(destBytes, 0, dest.position()); - dest.rewind(0); + dest.clear(); + if (!overflow) { + break; + } + } + while (true) { + boolean overflow = encoder.flush(dest).isOverflow(); + write(destBytes, 0, dest.position()); + dest.clear(); + if (!overflow) { + break; + } } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java index f89e58094..05e86add0 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java @@ -191,7 +191,7 @@ public abstract class TCharsetDecoder { reset(); } - TCharBuffer out = TCharBuffer.allocate((int)(averageCharsPerByte * in.remaining())); + TCharBuffer out = TCharBuffer.allocate(Math.max(8, (int)(in.remaining() * averageCharsPerByte))); TCoderResult result; while (true) { result = decode(in, out, false); @@ -237,7 +237,7 @@ public abstract class TCharsetDecoder { private TCharBuffer expand(TCharBuffer buffer) { char[] array = buffer.array(); - array = Arrays.copyOf(array, array.length * 2); + array = Arrays.copyOf(array, Math.max(8, array.length * 2)); TCharBuffer result = TCharBuffer.wrap(array); result.position(buffer.position()); return result; diff --git a/teavm-tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java b/teavm-tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java index 77b950c95..f56636e22 100644 --- a/teavm-tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java +++ b/teavm-tests/src/test/java/org/teavm/classlib/java/nio/charset/UTF8Test.java @@ -64,6 +64,17 @@ public class UTF8Test { assertArrayEquals(new byte[] { 97, -16, -112, -128, -128, 98 }, result); } + @Test + public void encodeSupplementary() { + Charset charset = Charset.forName("UTF-8"); + StringBuilder sb = new StringBuilder(); + sb.appendCodePoint(0xfedcb); + ByteBuffer buffer = charset.encode(sb.toString()); + byte[] result = new byte[buffer.remaining()]; + buffer.get(result); + assertArrayEquals(new byte[] { -13, -66, -73, -117 }, result); + } + @Test public void replaceMalformedFirstByte() { Charset charset = Charset.forName("UTF-8");