Replace old charset with NIO charset

This commit is contained in:
konsoletyper 2015-03-22 19:40:10 +03:00
parent aa2451c3e3
commit f5d09be35e
9 changed files with 113 additions and 337 deletions

View File

@ -1,74 +0,0 @@
/*
* Copyright 2013 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public class ByteBuffer {
private byte[] data;
private int end;
private int pos;
public ByteBuffer(byte[] data) {
this(data, 0, data.length);
}
public ByteBuffer(byte[] data, int start, int end) {
this.data = data;
this.end = end;
this.pos = start;
}
public void put(byte b) {
data[pos++] = b;
}
public void rewind(int start) {
this.pos = start;
}
public int available() {
return end - pos;
}
public void back(int count) {
pos -= count;
}
public boolean end() {
return pos == end;
}
public void skip(int count) {
pos += count;
}
public byte get() {
return data[pos++];
}
public int position() {
return pos;
}
public void put(ByteBuffer buffer) {
while (buffer.pos < buffer.end) {
data[pos++] = buffer.data[buffer.pos++];
}
}
}

View File

@ -1,75 +0,0 @@
/*
* Copyright 2013 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public class CharBuffer {
private char[] data;
private int end;
private int pos;
public CharBuffer(char[] data, int start, int end) {
this.data = data;
this.end = end;
this.pos = start;
}
public CharBuffer(char[] data) {
this(data, 0, data.length);
}
public void put(char b) {
data[pos++] = b;
}
public void rewind(int start) {
this.pos = start;
}
public int available() {
return end - pos;
}
public void back(int count) {
pos -= count;
}
public boolean end() {
return pos == end;
}
public char get() {
return data[pos++];
}
public int position() {
return pos;
}
public void skip(int count) {
pos += count;
}
public void put(CharBuffer buffer) {
int sz = Math.min(buffer.end - buffer.pos, end - pos);
for (int i = 0; i < sz; ++i) {
data[pos++] = buffer.data[buffer.pos++];
}
}
}

View File

@ -1,33 +0,0 @@
/*
* Copyright 2013 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public abstract class Charset {
public abstract void encode(CharBuffer source, ByteBuffer dest);
public abstract void decode(ByteBuffer source, CharBuffer dest);
public static Charset get(String name) {
if (name.toUpperCase().equals("UTF-8")) {
return new UTF8Charset();
}
return null;
}
}

View File

@ -1,91 +0,0 @@
/*
* Copyright 2013 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.impl.charset;
/**
*
* @author Alexey Andreev
*/
public class UTF8Charset extends Charset {
@Override
public void encode(CharBuffer source, ByteBuffer dest) {
while (!source.end() && dest.available() >= 4) {
char ch = source.get();
if (ch < 0x80) {
dest.put((byte)ch);
} else if (ch < 0x400) {
dest.put((byte)(0xC0 | (ch >> 6)));
dest.put((byte)(0x80 | (ch & 0x3F)));
} else if (!Character.isSurrogate(ch)) {
dest.put((byte)(0xE0 | (ch >> 12)));
dest.put((byte)(0x80 | ((ch >> 6) & 0x3F)));
dest.put((byte)(0x80 | (ch & 0x3F)));
} else if (Character.isHighSurrogate(ch)) {
char low = source.get();
if (!Character.isLowSurrogate(low)) {
source.back(1);
dest.put((byte)'?');
} else {
int codePoint = Character.toCodePoint(ch, low);
dest.put((byte)(0xF0 | (codePoint >> 18)));
dest.put((byte)(0x80 | ((codePoint >> 12) & 0x3F)));
dest.put((byte)(0x80 | ((codePoint >> 6) & 0x3F)));
dest.put((byte)(0x80 | (codePoint & 0x3F)));
}
} else {
dest.put((byte)'?');
}
}
}
@Override
public void decode(ByteBuffer source, CharBuffer dest) {
while (!source.end() && dest.available() >= 2) {
int b = source.get() & 0xFF;
if ((b & 0x80) == 0) {
dest.put((char)b);
} else if ((b & 0xE0) == 0xC0) {
if (source.end()) {
dest.put((char)b);
return;
}
dest.put((char)(((b & 0x1F) << 6) | (source.get() & 0x3F)));
} else if ((b & 0xF0) == 0xE0) {
if (source.available() < 2) {
source.skip(source.available());
dest.put((char)b);
return;
}
byte b2 = source.get();
byte b3 = source.get();
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
dest.put(!Character.isHighSurrogate(c) ? c : '?');
} else if ((b & 0xF8) == 0xF0) {
if (source.available() < 3) {
source.skip(source.available());
dest.put((char)b);
return;
}
byte b2 = source.get();
byte b3 = source.get();
byte b4 = source.get();
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
dest.put(Character.highSurrogate(code));
dest.put(Character.lowSurrogate(code));
}
}
}
}

View File

@ -21,6 +21,7 @@ import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.TCharset; import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder; import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCodingErrorAction; import org.teavm.classlib.java.nio.charset.TCodingErrorAction;
import org.teavm.classlib.java.nio.charset.TUnsupportedCharsetException;
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset; import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
/** /**
@ -29,8 +30,7 @@ import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
*/ */
public class TInputStreamReader extends TReader { public class TInputStreamReader extends TReader {
private TInputStream stream; private TInputStream stream;
private TCharset charset; private TCharsetDecoder decoder;
private TString charsetName;
private byte[] inData = new byte[8192]; private byte[] inData = new byte[8192];
private TByteBuffer inBuffer = TByteBuffer.wrap(inData); private TByteBuffer inBuffer = TByteBuffer.wrap(inData);
private char[] outData = new char[1024]; private char[] outData = new char[1024];
@ -38,25 +38,37 @@ public class TInputStreamReader extends TReader {
private boolean streamEof; private boolean streamEof;
private boolean eof; private boolean eof;
public TInputStreamReader(TInputStream in, TString charsetName) { public TInputStreamReader(TInputStream in, TString charsetName) throws TUnsupportedEncodingException {
this(in, TCharset.forName(charsetName.toString())); this(in, getCharset(charsetName));
this.charsetName = charsetName; }
public TInputStreamReader(TInputStream in, TCharset charset) {
this(in, charset.newDecoder()
.onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.REPLACE));
} }
public TInputStreamReader(TInputStream in) { public TInputStreamReader(TInputStream in) {
this(in, new TUTF8Charset()); this(in, new TUTF8Charset());
charsetName = TString.wrap("UTF-8");
} }
public TInputStreamReader(TInputStream in, TCharset charset) { public TInputStreamReader(TInputStream in, TCharsetDecoder decoder) {
this.stream = in; this.stream = in;
this.charset = charset; this.decoder = decoder;
outBuffer.position(outBuffer.limit()); outBuffer.position(outBuffer.limit());
inBuffer.position(inBuffer.limit()); inBuffer.position(inBuffer.limit());
} }
private static TCharset getCharset(TString charsetName) throws TUnsupportedEncodingException {
try {
return TCharset.forName(charsetName.toString());
} catch (TUnsupportedCharsetException e) {
throw new TUnsupportedEncodingException(charsetName);
}
}
public TString getEncoding() { public TString getEncoding() {
return charsetName; return TString.wrap(decoder.charset().name());
} }
@Override @Override
@ -98,17 +110,16 @@ public class TInputStreamReader extends TReader {
return false; return false;
} }
outBuffer.compact(); outBuffer.compact();
TCharsetDecoder decoder = charset.newDecoder()
.onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.IGNORE);
while (true) { while (true) {
if (!inBuffer.hasRemaining() && !fillReadBuffer()) { if (!inBuffer.hasRemaining() && !fillReadBuffer()) {
break;
}
if (decoder.decode(inBuffer, outBuffer, streamEof).isOverflow()) {
break;
}
}
if (!inBuffer.hasRemaining() && streamEof && decoder.flush(outBuffer).isUnderflow()) {
eof = true; eof = true;
break;
}
if (decoder.decode(inBuffer, outBuffer, eof).isOverflow()) {
break;
}
} }
outBuffer.flip(); outBuffer.flip();
return true; return true;

View File

@ -15,40 +15,61 @@
*/ */
package org.teavm.classlib.java.io; package org.teavm.classlib.java.io;
import org.teavm.classlib.impl.charset.ByteBuffer;
import org.teavm.classlib.impl.charset.CharBuffer;
import org.teavm.classlib.impl.charset.Charset;
import org.teavm.classlib.java.lang.TString; import org.teavm.classlib.java.lang.TString;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.*;
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
public class TOutputStreamWriter extends TWriter { public class TOutputStreamWriter extends TWriter {
private TOutputStream out; private TOutputStream out;
private String encoding; private TCharsetEncoder encoder;
private Charset charset;
private byte[] bufferData = new byte[512]; private byte[] bufferData = new byte[512];
private ByteBuffer buffer = new ByteBuffer(bufferData); private TByteBuffer buffer = TByteBuffer.wrap(bufferData);
private boolean closed;
public TOutputStreamWriter(TOutputStream out) { public TOutputStreamWriter(TOutputStream out) {
this(out, "UTF-8"); this(nullCheck(out), new TUTF8Charset());
} }
public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException { public TOutputStreamWriter(TOutputStream out, final String enc) throws TUnsupportedEncodingException {
super(out); this(nullCheck(out), getCharset(enc));
if (enc == null) { }
public TOutputStreamWriter(TOutputStream out, TCharset charset) {
this(nullCheck(out), charset.newEncoder()
.onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.REPLACE));
}
public TOutputStreamWriter(TOutputStream out, TCharsetEncoder encoder) {
this.out = nullCheck(out);
this.encoder = encoder;
}
private static TOutputStream nullCheck(TOutputStream stream) {
if (stream == null) {
throw new NullPointerException(); throw new NullPointerException();
} }
this.out = out; return stream;
charset = Charset.get(enc); }
if (charset == null) {
throw new TUnsupportedEncodingException(TString.wrap(enc)); private static TCharset getCharset(String charsetName) throws TUnsupportedEncodingException {
if (charsetName == null) {
throw new NullPointerException();
}
try {
return TCharset.forName(charsetName);
} catch (TUnsupportedCharsetException | TIllegalCharsetNameException e) {
throw new TUnsupportedEncodingException(TString.wrap(charsetName));
} }
encoding = enc;
} }
@Override @Override
public void close() throws TIOException { public void close() throws TIOException {
if (charset != null) { if (!closed) {
flush(); flush();
charset = null; closed = true;
out.flush(); out.flush();
out.close(); out.close();
} }
@ -59,19 +80,19 @@ public class TOutputStreamWriter extends TWriter {
checkStatus(); checkStatus();
if (buffer.position() > 0) { if (buffer.position() > 0) {
out.write(bufferData, 0, buffer.position()); out.write(bufferData, 0, buffer.position());
buffer.rewind(0); buffer.clear();
} }
out.flush(); out.flush();
} }
private void checkStatus() throws TIOException { private void checkStatus() throws TIOException {
if (charset == null) { if (closed) {
throw new TIOException(TString.wrap("Writer already closed")); throw new TIOException(TString.wrap("Writer already closed"));
} }
} }
public String getEncoding() { public String getEncoding() {
return encoding; return encoder.charset().name();
} }
@Override @Override
@ -84,30 +105,20 @@ public class TOutputStreamWriter extends TWriter {
if (offset < 0 || offset > buf.length - count || count < 0) { if (offset < 0 || offset > buf.length - count || count < 0) {
throw new IndexOutOfBoundsException(); throw new IndexOutOfBoundsException();
} }
CharBuffer input = new CharBuffer(buf, offset, offset + count); TCharBuffer input = TCharBuffer.wrap(buf, offset, count);
while (!input.end()) { while (input.hasRemaining()) {
if (buffer.available() < 6) { if (encoder.encode(input, buffer, false).isOverflow()) {
out.write(bufferData, 0, buffer.position()); out.write(bufferData, 0, buffer.position());
buffer.rewind(0); buffer.clear();
} }
charset.encode(input, buffer);
} }
} }
} }
@Override @Override
public void write(int oneChar) throws TIOException { public void write(int oneChar) throws TIOException {
synchronized (lock) { char[] array = { (char)oneChar };
checkStatus(); write(array, 0, array.length);
CharBuffer input = new CharBuffer(new char[] { (char)oneChar }, 0, 1);
while (!input.end()) {
if (buffer.available() < 6) {
out.write(bufferData, 0, buffer.position());
buffer.rewind(0);
}
charset.encode(input, buffer);
}
}
} }
@Override @Override

View File

@ -15,13 +15,14 @@
*/ */
package org.teavm.classlib.java.io; package org.teavm.classlib.java.io;
import org.teavm.classlib.impl.charset.ByteBuffer;
import org.teavm.classlib.impl.charset.CharBuffer;
import org.teavm.classlib.impl.charset.Charset;
import org.teavm.classlib.java.lang.TMath; import org.teavm.classlib.java.lang.TMath;
import org.teavm.classlib.java.lang.TObject; import org.teavm.classlib.java.lang.TObject;
import org.teavm.classlib.java.lang.TString; import org.teavm.classlib.java.lang.TString;
import org.teavm.classlib.java.lang.TStringBuilder; import org.teavm.classlib.java.lang.TStringBuilder;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.*;
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
/** /**
* *
@ -32,21 +33,22 @@ public class TPrintStream extends TFilterOutputStream {
private boolean errorState; private boolean errorState;
private TStringBuilder sb = new TStringBuilder(); private TStringBuilder sb = new TStringBuilder();
private char[] buffer = new char[32]; private char[] buffer = new char[32];
private Charset charset; private TCharset charset;
public TPrintStream(TOutputStream out, boolean autoFlush, TString encoding) throws TUnsupportedEncodingException { public TPrintStream(TOutputStream out, boolean autoFlush, TString encoding) throws TUnsupportedEncodingException {
super(out); super(out);
this.autoFlush = autoFlush; this.autoFlush = autoFlush;
charset = Charset.get(encoding.toString()); try {
if (charset == null) { charset = TCharset.forName(encoding.toString());
throw new TUnsupportedEncodingException(TString.wrap("Unsupported encoding: ").concat(encoding)); } catch (TUnsupportedCharsetException | TIllegalCharsetNameException e) {
throw new TUnsupportedEncodingException(encoding);
} }
} }
public TPrintStream(TOutputStream out, boolean autoFlush) { public TPrintStream(TOutputStream out, boolean autoFlush) {
super(out); super(out);
this.autoFlush = autoFlush; this.autoFlush = autoFlush;
this.charset = Charset.get("UTF-8"); this.charset = new TUTF8Charset();
} }
public TPrintStream(TOutputStream out) { public TPrintStream(TOutputStream out) {
@ -131,13 +133,27 @@ public class TPrintStream extends TFilterOutputStream {
} }
private void print(char[] s, int begin, int end) { private void print(char[] s, int begin, int end) {
CharBuffer src = new CharBuffer(s, begin, end); TCharBuffer src = TCharBuffer.wrap(s, begin, end - begin);
byte[] destBytes = new byte[TMath.max(16, TMath.min(s.length, 1024))]; byte[] destBytes = new byte[TMath.max(16, TMath.min(s.length, 1024))];
ByteBuffer dest = new ByteBuffer(destBytes); TByteBuffer dest = TByteBuffer.wrap(destBytes);
while (!src.end()) { TCharsetEncoder encoder = charset.newEncoder()
charset.encode(src, dest); .onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.REPLACE);
while (true) {
boolean overflow = encoder.encode(src, dest, true).isOverflow();
write(destBytes, 0, dest.position()); write(destBytes, 0, dest.position());
dest.rewind(0); dest.clear();
if (!overflow) {
break;
}
}
while (true) {
boolean overflow = encoder.flush(dest).isOverflow();
write(destBytes, 0, dest.position());
dest.clear();
if (!overflow) {
break;
}
} }
} }

View File

@ -191,7 +191,7 @@ public abstract class TCharsetDecoder {
reset(); reset();
} }
TCharBuffer out = TCharBuffer.allocate((int)(averageCharsPerByte * in.remaining())); TCharBuffer out = TCharBuffer.allocate(Math.max(8, (int)(in.remaining() * averageCharsPerByte)));
TCoderResult result; TCoderResult result;
while (true) { while (true) {
result = decode(in, out, false); result = decode(in, out, false);
@ -237,7 +237,7 @@ public abstract class TCharsetDecoder {
private TCharBuffer expand(TCharBuffer buffer) { private TCharBuffer expand(TCharBuffer buffer) {
char[] array = buffer.array(); char[] array = buffer.array();
array = Arrays.copyOf(array, array.length * 2); array = Arrays.copyOf(array, Math.max(8, array.length * 2));
TCharBuffer result = TCharBuffer.wrap(array); TCharBuffer result = TCharBuffer.wrap(array);
result.position(buffer.position()); result.position(buffer.position());
return result; return result;

View File

@ -64,6 +64,17 @@ public class UTF8Test {
assertArrayEquals(new byte[] { 97, -16, -112, -128, -128, 98 }, result); assertArrayEquals(new byte[] { 97, -16, -112, -128, -128, 98 }, result);
} }
@Test
public void encodeSupplementary() {
Charset charset = Charset.forName("UTF-8");
StringBuilder sb = new StringBuilder();
sb.appendCodePoint(0xfedcb);
ByteBuffer buffer = charset.encode(sb.toString());
byte[] result = new byte[buffer.remaining()];
buffer.get(result);
assertArrayEquals(new byte[] { -13, -66, -73, -117 }, result);
}
@Test @Test
public void replaceMalformedFirstByte() { public void replaceMalformedFirstByte() {
Charset charset = Charset.forName("UTF-8"); Charset charset = Charset.forName("UTF-8");