Further implementation of nio charsets

This commit is contained in:
konsoletyper 2015-03-21 22:46:16 +03:00
parent 59b2d0b21e
commit 0be769f74d
8 changed files with 521 additions and 55 deletions

View File

@ -106,6 +106,7 @@
<argument>java.math</argument> <argument>java.math</argument>
<argument>java.net</argument> <argument>java.net</argument>
<argument>java.nio</argument> <argument>java.nio</argument>
<argument>java.nio.charset</argument>
<argument>java.text</argument> <argument>java.text</argument>
<argument>java.util</argument> <argument>java.util</argument>
<argument>java.util.logging</argument> <argument>java.util.logging</argument>

View File

@ -18,6 +18,9 @@ package org.teavm.classlib.java.nio.charset;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.impl.UTF8Charset;
/** /**
* *
@ -29,12 +32,54 @@ public abstract class TCharset implements Comparable<TCharset> {
private Set<String> aliasSet; private Set<String> aliasSet;
protected TCharset(String canonicalName, String[] aliases) { protected TCharset(String canonicalName, String[] aliases) {
checkCanonicalName(canonicalName);
for (String alias : aliases) {
checkCanonicalName(alias);
}
this.canonicalName = canonicalName; this.canonicalName = canonicalName;
this.aliases = aliases.clone(); this.aliases = aliases.clone();
} }
private static void checkCanonicalName(String name) {
if (name.isEmpty()) {
throw new TIllegalCharsetNameException(name);
}
if (!isValidCharsetStart(name.charAt(0))) {
throw new TIllegalCharsetNameException(name);
}
for (int i = 1; i < name.length(); ++i) {
char c = name.charAt(i);
switch (c) {
case '-':
case '+':
case '.':
case ':':
case '_':
break;
default:
if (!isValidCharsetStart(c)) {
throw new TIllegalCharsetNameException(name);
}
break;
}
}
}
private static boolean isValidCharsetStart(char c) {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' || c <= 'Z';
}
public static TCharset forName(String charsetName) { public static TCharset forName(String charsetName) {
return null; if (charsetName == null) {
throw new IllegalArgumentException("charsetName is null");
}
checkCanonicalName(charsetName);
switch (charsetName.toUpperCase()) {
case "UTF-8":
return new UTF8Charset();
default:
throw new TUnsupportedCharsetException(charsetName);
}
} }
public final String name() { public final String name() {
@ -56,6 +101,8 @@ public abstract class TCharset implements Comparable<TCharset> {
return canonicalName; return canonicalName;
} }
public abstract boolean contains(TCharset cs);
public abstract TCharsetDecoder newDecoder(); public abstract TCharsetDecoder newDecoder();
public abstract TCharsetEncoder newEncoder(); public abstract TCharsetEncoder newEncoder();
@ -63,4 +110,35 @@ public abstract class TCharset implements Comparable<TCharset> {
public boolean canEncode() { public boolean canEncode() {
return true; return true;
} }
public final TCharBuffer decode(TByteBuffer bb) {
try {
return newDecoder()
.onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.REPLACE)
.decode(bb);
} catch (TCharacterCodingException e) {
throw new AssertionError("Should never been thrown", e);
}
}
public final TByteBuffer encode(TCharBuffer cb) {
try {
return newEncoder()
.onMalformedInput(TCodingErrorAction.REPLACE)
.onUnmappableCharacter(TCodingErrorAction.REPLACE)
.encode(cb);
} catch (TCharacterCodingException e) {
throw new AssertionError("Should never been thrown", e);
}
}
public final TByteBuffer encode(String str) {
return encode(TCharBuffer.wrap(str));
}
@Override
public final int compareTo(TCharset that) {
return canonicalName.compareToIgnoreCase(that.canonicalName);
}
} }

View File

@ -15,10 +15,230 @@
*/ */
package org.teavm.classlib.java.nio.charset; package org.teavm.classlib.java.nio.charset;
import java.util.Arrays;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
/** /**
* *
* @author Alexey Andreev * @author Alexey Andreev
*/ */
public class TCharsetDecoder { public abstract class TCharsetDecoder {
private static final int INIT = 0;
private static final int IN_PROGRESS = 1;
private static final int END = 2;
private static final int FLUSH = 2;
private TCharset charset;
private float averageCharsPerByte;
private float maxCharsPerByte;
private String replacement = "?";
private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT;
private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT;
private int state;
protected TCharsetDecoder(TCharset cs, float averageCharsPerByte, float maxCharsPerByte) {
if (averageCharsPerByte <= 0) {
throw new IllegalArgumentException("averageCharsPerByte must be positive. Actual value is " +
averageCharsPerByte);
}
if (maxCharsPerByte <= 0) {
throw new IllegalArgumentException("maxCharsPerByte must be positive. Actual value is " +
maxCharsPerByte);
}
this.charset = cs;
this.averageCharsPerByte = averageCharsPerByte;
this.maxCharsPerByte = maxCharsPerByte;
}
public final TCharset charset() {
return charset;
}
public final String replacement() {
return replacement;
}
public final TCharsetDecoder replaceWith(String newReplacement) {
if (newReplacement == null || newReplacement.isEmpty()) {
throw new IllegalArgumentException("New replacement is null or empty");
}
this.replacement = newReplacement;
implReplaceWith(newReplacement);
return this;
}
protected void implReplaceWith(@SuppressWarnings("unused") String newReplacement) {
}
public TCodingErrorAction malformedInputAction() {
return malformedAction;
}
public final TCharsetDecoder onMalformedInput(TCodingErrorAction newAction) {
if (newAction == null) {
throw new IllegalArgumentException("newAction must be non-null");
}
this.malformedAction = newAction;
implOnMalformedInput(newAction);
return this;
}
protected void implOnMalformedInput(@SuppressWarnings("unused") TCodingErrorAction newAction) {
}
public TCodingErrorAction unmappableCharacterAction() {
return unmappableAction;
}
public final TCharsetDecoder onUnmappableCharacter(TCodingErrorAction newAction) {
if (newAction == null) {
throw new IllegalArgumentException("newAction must be non-null");
}
this.unmappableAction = newAction;
implOnUnmappableCharacter(newAction);
return this;
}
protected void implOnUnmappableCharacter(@SuppressWarnings("unused") TCodingErrorAction newAction) {
}
public final float averageCharsPerByte() {
return averageCharsPerByte;
}
public final float maxCharsPerByte() {
return maxCharsPerByte;
}
public final TCoderResult decode(TByteBuffer in, TCharBuffer out, boolean endOfInput) {
if (state == END && !endOfInput || state == FLUSH) {
throw new IllegalStateException();
}
state = !endOfInput ? IN_PROGRESS : END;
while (true) {
TCoderResult result;
try {
result = decodeLoop(in, out);
} catch (RuntimeException e) {
throw new TCoderMalfunctionError(e);
}
if (result.isOverflow()) {
return result;
} else if (result.isUnderflow()) {
if (endOfInput) {
state = END;
return TCoderResult.malformedForLength(in.remaining());
}
return result;
} else if (result.isMalformed()) {
if (malformedAction == TCodingErrorAction.REPORT) {
return result;
}
if (malformedAction == TCodingErrorAction.REPLACE) {
if (out.remaining() < replacement.length()) {
return TCoderResult.OVERFLOW;
}
out.put(replacement);
}
in.position(in.position() + result.length());
} else if (result.isUnmappable()) {
if (unmappableAction == TCodingErrorAction.REPORT) {
return result;
}
if (unmappableAction == TCodingErrorAction.REPLACE) {
if (out.remaining() < replacement.length()) {
return TCoderResult.OVERFLOW;
}
out.put(replacement);
}
in.position(in.position() + result.length());
}
}
}
public final TCoderResult flush(TCharBuffer out) {
if (state != FLUSH && state != END) {
throw new IllegalStateException();
}
state = FLUSH;
return implFlush(out);
}
public final TCharsetDecoder reset() {
state = INIT;
implReset();
return this;
}
public final TCharBuffer decode(TByteBuffer in) throws TCharacterCodingException {
if (state != INIT && state != FLUSH) {
throw new IllegalStateException();
}
if (in.remaining() == 0) {
return TCharBuffer.allocate(0);
}
if (state != INIT) {
reset();
}
TCharBuffer out = TCharBuffer.allocate((int)(averageCharsPerByte * in.remaining()));
TCoderResult result;
while (true) {
result = decode(in, out, false);
if (result.isUnderflow()) {
break;
} else if (result.isOverflow()) {
out = expand(out);
}
if (result.isError()) {
result.throwException();
}
}
result = decode(in, out, true);
if (result.isError()) {
result.throwException();
}
while (true) {
result = flush(out);
if (result.isUnderflow()) {
break;
} else {
out = expand(out);
}
}
out.flip();
return out;
}
public boolean isAutoDetecting() {
return false;
}
public boolean isCharsetDetected() {
throw new UnsupportedOperationException();
}
public TCharset detectedCharset() {
throw new UnsupportedOperationException();
}
private TCharBuffer expand(TCharBuffer buffer) {
char[] array = buffer.array();
array = Arrays.copyOf(array, array.length * 2);
TCharBuffer result = TCharBuffer.wrap(array);
result.position(buffer.position());
return result;
}
protected abstract TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out);
protected TCoderResult implFlush(@SuppressWarnings("unused") TCharBuffer out) {
return TCoderResult.UNDERFLOW;
}
protected void implReset() {
}
} }

View File

@ -15,6 +15,7 @@
*/ */
package org.teavm.classlib.java.nio.charset; package org.teavm.classlib.java.nio.charset;
import java.util.Arrays;
import org.teavm.classlib.java.nio.TByteBuffer; import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer; import org.teavm.classlib.java.nio.TCharBuffer;
@ -23,11 +24,11 @@ import org.teavm.classlib.java.nio.TCharBuffer;
* @author Alexey Andreev * @author Alexey Andreev
*/ */
public abstract class TCharsetEncoder { public abstract class TCharsetEncoder {
private static final int READY = 0; private static final int READY = 4;
private static final int ONGOING = 1; private static final int ONGOING = 1;
private static final int END = 2; private static final int END = 2;
private static final int FLUSH = 3; private static final int FLUSH = 3;
private static final int INIT = 4; private static final int INIT = 0;
private TCharset charset; private TCharset charset;
private byte[] replacement; private byte[] replacement;
private float averageBytesPerChar; private float averageBytesPerChar;
@ -35,7 +36,6 @@ public abstract class TCharsetEncoder {
private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT; private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT;
private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT; private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT;
private int status; private int status;
private boolean finished;
protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) { protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) {
checkReplacement(replacement); checkReplacement(replacement);
@ -43,7 +43,6 @@ public abstract class TCharsetEncoder {
this.replacement = replacement.clone(); this.replacement = replacement.clone();
this.averageBytesPerChar = averageBytesPerChar; this.averageBytesPerChar = averageBytesPerChar;
this.maxBytesPerChar = maxBytesPerChar; this.maxBytesPerChar = maxBytesPerChar;
status = INIT;
} }
protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar) { protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar) {
@ -115,25 +114,19 @@ public abstract class TCharsetEncoder {
} }
public final TCoderResult encode(TCharBuffer in, TByteBuffer out, boolean endOfInput) { public final TCoderResult encode(TCharBuffer in, TByteBuffer out, boolean endOfInput) {
if (status == READY && finished && !endOfInput) {
throw new IllegalStateException();
}
if (status == FLUSH || !endOfInput && status == END) { if (status == FLUSH || !endOfInput && status == END) {
throw new IllegalStateException(); throw new IllegalStateException();
} }
status = endOfInput ? END : ONGOING;
TCoderResult result; TCoderResult result;
while (true) { while (true) {
try { try {
result = encodeLoop(in, out); result = encodeLoop(in, out);
} catch (TBufferOverflowException e) { } catch (RuntimeException e) {
throw new TCoderMalfunctionError(e);
} catch (TBufferUnderflowException e) {
throw new TCoderMalfunctionError(e); throw new TCoderMalfunctionError(e);
} }
if (result == TCoderResult.UNDERFLOW) { if (result.isUnderflow()) {
status = endOfInput ? END : ONGOING;
if (endOfInput) { if (endOfInput) {
int remaining = in.remaining(); int remaining = in.remaining();
if (remaining > 0) { if (remaining > 0) {
@ -144,14 +137,10 @@ public abstract class TCharsetEncoder {
} else { } else {
return result; return result;
} }
} else if (result == TCoderResult.OVERFLOW) { } else if (result.isOverflow()) {
status = endOfInput ? END : ONGOING;
return result; return result;
} }
TCodingErrorAction action = malformedAction; TCodingErrorAction action = result.isUnmappable() ? unmappableAction : malformedAction;
if (result.isUnmappable()) {
action = unmappableAction;
}
if (action == TCodingErrorAction.REPLACE) { if (action == TCodingErrorAction.REPLACE) {
if (out.remaining() < replacement.length) { if (out.remaining() < replacement.length) {
return TCoderResult.OVERFLOW; return TCoderResult.OVERFLOW;
@ -171,9 +160,9 @@ public abstract class TCharsetEncoder {
return TByteBuffer.allocate(0); return TByteBuffer.allocate(0);
} }
reset(); reset();
int length = (int)(in.remaining() * averageBytesPerChar); TByteBuffer output = TByteBuffer.allocate((int)(in.remaining() * averageBytesPerChar));
TByteBuffer output = TByteBuffer.allocate(length);
TCoderResult result = null; TCoderResult result;
while (true) { while (true) {
result = encode(in, output, false); result = encode(in, output, false);
if (result == TCoderResult.UNDERFLOW) { if (result == TCoderResult.UNDERFLOW) {
@ -182,31 +171,25 @@ public abstract class TCharsetEncoder {
output = allocateMore(output); output = allocateMore(output);
continue; continue;
} }
checkCoderResult(result); if (result.isError()) {
result.throwException();
}
} }
result = encode(in, output, true); result = encode(in, output, true);
checkCoderResult(result); if (result.isError()) {
result.throwException();
}
while (true) { while (true) {
result = flush(output); result = flush(output);
if (result == TCoderResult.UNDERFLOW) { if (result.isUnderflow()) {
output.flip();
break; break;
} else if (result == TCoderResult.OVERFLOW) { } else if (result.isOverflow()) {
output = allocateMore(output); output = allocateMore(output);
continue;
} }
checkCoderResult(result);
output.flip();
if (result.isMalformed()) {
throw new TMalformedInputException(result.length());
} else if (result.isUnmappable()) {
throw new TUnmappableCharacterException(result.length());
}
break;
} }
status = READY; output.flip();
finished = true;
return output; return output;
} }
@ -249,21 +232,11 @@ public abstract class TCharsetEncoder {
return implCanEncode(cb); return implCanEncode(cb);
} }
private void checkCoderResult(TCoderResult result) throws TCharacterCodingException { private TByteBuffer allocateMore(TByteBuffer buffer) {
if (malformedAction == TCodingErrorAction.REPORT && result.isMalformed()) { byte[] array = buffer.array();
throw new TMalformedInputException(result.length()); array = Arrays.copyOf(array, array.length * 2);
} else if (unmappableAction == TCodingErrorAction.REPORT && result.isUnmappable()) { TByteBuffer result = TByteBuffer.wrap(array);
throw new TUnmappableCharacterException(result.length()); result.position(buffer.position());
}
}
private TByteBuffer allocateMore(TByteBuffer output) {
if (output.capacity() == 0) {
return TByteBuffer.allocate(1);
}
TByteBuffer result = TByteBuffer.allocate(output.capacity() * 2);
output.flip();
result.put(output);
return result; return result;
} }

View File

@ -0,0 +1,33 @@
/*
* Copyright 2015 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
/**
*
* @author Alexey Andreev
*/
public class TIllegalCharsetNameException extends IllegalArgumentException {
private static final long serialVersionUID = -6848948447254188722L;
private String charsetName;
public TIllegalCharsetNameException(String charsetName) {
this.charsetName = charsetName;
}
public String getCharsetName() {
return charsetName;
}
}

View File

@ -0,0 +1,33 @@
/*
* Copyright 2015 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset;
/**
*
* @author Alexey Andreev
*/
public class TUnsupportedCharsetException extends IllegalArgumentException {
private static final long serialVersionUID = 2668607022458967777L;
private String charsetName;
public TUnsupportedCharsetException(String charsetName) {
this.charsetName = charsetName;
}
public String getCharsetName() {
return charsetName;
}
}

View File

@ -0,0 +1,45 @@
/*
* Copyright 2015 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCharsetEncoder;
/**
*
* @author Alexey Andreev
*/
public class UTF8Charset extends TCharset {
public UTF8Charset() {
super("UTF-8", new String[0]);
}
@Override
public boolean contains(TCharset cs) {
return cs == this;
}
@Override
public TCharsetDecoder newDecoder() {
return new UTF8Decoder(this);
}
@Override
public TCharsetEncoder newEncoder() {
return null;
}
}

View File

@ -0,0 +1,83 @@
/*
* Copyright 2015 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.nio.charset.impl;
import org.teavm.classlib.impl.charset.UTF16Helper;
import org.teavm.classlib.java.nio.TByteBuffer;
import org.teavm.classlib.java.nio.TCharBuffer;
import org.teavm.classlib.java.nio.charset.TCharset;
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
import org.teavm.classlib.java.nio.charset.TCoderResult;
/**
*
* @author Alexey Andreev
*/
public class UTF8Decoder extends TCharsetDecoder {
public UTF8Decoder(TCharset cs) {
super(cs, 1f / 3, 0.5f);
}
@Override
protected TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out) {
while (true) {
if (in.remaining() < 4) {
return TCoderResult.UNDERFLOW;
}
if (!out.hasRemaining()) {
return TCoderResult.OVERFLOW;
}
int b = in.get() & 0xFF;
if ((b & 0x80) == 0) {
out.put((char)b);
} else if ((b & 0xE0) == 0xC0) {
if (!in.hasRemaining()) {
in.position(in.position() - 1);
return TCoderResult.UNDERFLOW;
}
out.put((char)(((b & 0x1F) << 6) | (in.get() & 0x3F)));
} else if ((b & 0xF0) == 0xE0) {
if (in.remaining() < 2) {
in.position(in.position() - 1);
return TCoderResult.UNDERFLOW;
}
byte b2 = in.get();
byte b3 = in.get();
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
if (Character.isSurrogate(c)) {
in.position(in.position() - 2);
return TCoderResult.malformedForLength(3);
}
out.put(c);
} else if ((b & 0xF8) == 0xF0) {
if (in.remaining() < 3) {
in.position(in.position() - 1);
return TCoderResult.UNDERFLOW;
}
if (out.remaining() < 3) {
in.position(in.position() - 1);
return TCoderResult.OVERFLOW;
}
byte b2 = in.get();
byte b3 = in.get();
byte b4 = in.get();
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
out.put(UTF16Helper.highSurrogate(code));
out.put(UTF16Helper.lowSurrogate(code));
}
}
}
}