diff --git a/teavm-classlib/pom.xml b/teavm-classlib/pom.xml index 2624f6ff9..2b09f1141 100644 --- a/teavm-classlib/pom.xml +++ b/teavm-classlib/pom.xml @@ -106,6 +106,7 @@ java.math java.net java.nio + java.nio.charset java.text java.util java.util.logging diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharset.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharset.java index 26bc0c1ef..22ae9cad8 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharset.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharset.java @@ -18,6 +18,9 @@ package org.teavm.classlib.java.nio.charset; import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.teavm.classlib.java.nio.TByteBuffer; +import org.teavm.classlib.java.nio.TCharBuffer; +import org.teavm.classlib.java.nio.charset.impl.UTF8Charset; /** * @@ -29,12 +32,54 @@ public abstract class TCharset implements Comparable { private Set aliasSet; protected TCharset(String canonicalName, String[] aliases) { + checkCanonicalName(canonicalName); + for (String alias : aliases) { + checkCanonicalName(alias); + } this.canonicalName = canonicalName; this.aliases = aliases.clone(); } + private static void checkCanonicalName(String name) { + if (name.isEmpty()) { + throw new TIllegalCharsetNameException(name); + } + if (!isValidCharsetStart(name.charAt(0))) { + throw new TIllegalCharsetNameException(name); + } + for (int i = 1; i < name.length(); ++i) { + char c = name.charAt(i); + switch (c) { + case '-': + case '+': + case '.': + case ':': + case '_': + break; + default: + if (!isValidCharsetStart(c)) { + throw new TIllegalCharsetNameException(name); + } + break; + } + } + } + + private static boolean isValidCharsetStart(char c) { + return c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' || c <= 'Z'; + } + public static TCharset forName(String charsetName) { - return null; + if (charsetName == null) { + throw new IllegalArgumentException("charsetName is null"); + } + checkCanonicalName(charsetName); + switch (charsetName.toUpperCase()) { + case "UTF-8": + return new UTF8Charset(); + default: + throw new TUnsupportedCharsetException(charsetName); + } } public final String name() { @@ -56,6 +101,8 @@ public abstract class TCharset implements Comparable { return canonicalName; } + public abstract boolean contains(TCharset cs); + public abstract TCharsetDecoder newDecoder(); public abstract TCharsetEncoder newEncoder(); @@ -63,4 +110,35 @@ public abstract class TCharset implements Comparable { public boolean canEncode() { return true; } + + public final TCharBuffer decode(TByteBuffer bb) { + try { + return newDecoder() + .onMalformedInput(TCodingErrorAction.REPLACE) + .onUnmappableCharacter(TCodingErrorAction.REPLACE) + .decode(bb); + } catch (TCharacterCodingException e) { + throw new AssertionError("Should never been thrown", e); + } + } + + public final TByteBuffer encode(TCharBuffer cb) { + try { + return newEncoder() + .onMalformedInput(TCodingErrorAction.REPLACE) + .onUnmappableCharacter(TCodingErrorAction.REPLACE) + .encode(cb); + } catch (TCharacterCodingException e) { + throw new AssertionError("Should never been thrown", e); + } + } + + public final TByteBuffer encode(String str) { + return encode(TCharBuffer.wrap(str)); + } + + @Override + public final int compareTo(TCharset that) { + return canonicalName.compareToIgnoreCase(that.canonicalName); + } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java index e3587a204..3447037ca 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetDecoder.java @@ -15,10 +15,230 @@ */ package org.teavm.classlib.java.nio.charset; +import java.util.Arrays; +import org.teavm.classlib.java.nio.TByteBuffer; +import org.teavm.classlib.java.nio.TCharBuffer; + /** * * @author Alexey Andreev */ -public class TCharsetDecoder { +public abstract class TCharsetDecoder { + private static final int INIT = 0; + private static final int IN_PROGRESS = 1; + private static final int END = 2; + private static final int FLUSH = 2; + private TCharset charset; + private float averageCharsPerByte; + private float maxCharsPerByte; + private String replacement = "?"; + private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT; + private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT; + private int state; + protected TCharsetDecoder(TCharset cs, float averageCharsPerByte, float maxCharsPerByte) { + if (averageCharsPerByte <= 0) { + throw new IllegalArgumentException("averageCharsPerByte must be positive. Actual value is " + + averageCharsPerByte); + } + if (maxCharsPerByte <= 0) { + throw new IllegalArgumentException("maxCharsPerByte must be positive. Actual value is " + + maxCharsPerByte); + } + this.charset = cs; + this.averageCharsPerByte = averageCharsPerByte; + this.maxCharsPerByte = maxCharsPerByte; + } + + public final TCharset charset() { + return charset; + } + + public final String replacement() { + return replacement; + } + + public final TCharsetDecoder replaceWith(String newReplacement) { + if (newReplacement == null || newReplacement.isEmpty()) { + throw new IllegalArgumentException("New replacement is null or empty"); + } + this.replacement = newReplacement; + implReplaceWith(newReplacement); + return this; + } + + protected void implReplaceWith(@SuppressWarnings("unused") String newReplacement) { + } + + public TCodingErrorAction malformedInputAction() { + return malformedAction; + } + + public final TCharsetDecoder onMalformedInput(TCodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("newAction must be non-null"); + } + this.malformedAction = newAction; + implOnMalformedInput(newAction); + return this; + } + + protected void implOnMalformedInput(@SuppressWarnings("unused") TCodingErrorAction newAction) { + } + + public TCodingErrorAction unmappableCharacterAction() { + return unmappableAction; + } + + public final TCharsetDecoder onUnmappableCharacter(TCodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("newAction must be non-null"); + } + this.unmappableAction = newAction; + implOnUnmappableCharacter(newAction); + return this; + } + + protected void implOnUnmappableCharacter(@SuppressWarnings("unused") TCodingErrorAction newAction) { + } + + public final float averageCharsPerByte() { + return averageCharsPerByte; + } + + public final float maxCharsPerByte() { + return maxCharsPerByte; + } + + public final TCoderResult decode(TByteBuffer in, TCharBuffer out, boolean endOfInput) { + if (state == END && !endOfInput || state == FLUSH) { + throw new IllegalStateException(); + } + state = !endOfInput ? IN_PROGRESS : END; + while (true) { + TCoderResult result; + try { + result = decodeLoop(in, out); + } catch (RuntimeException e) { + throw new TCoderMalfunctionError(e); + } + if (result.isOverflow()) { + return result; + } else if (result.isUnderflow()) { + if (endOfInput) { + state = END; + return TCoderResult.malformedForLength(in.remaining()); + } + return result; + } else if (result.isMalformed()) { + if (malformedAction == TCodingErrorAction.REPORT) { + return result; + } + if (malformedAction == TCodingErrorAction.REPLACE) { + if (out.remaining() < replacement.length()) { + return TCoderResult.OVERFLOW; + } + out.put(replacement); + } + in.position(in.position() + result.length()); + } else if (result.isUnmappable()) { + if (unmappableAction == TCodingErrorAction.REPORT) { + return result; + } + if (unmappableAction == TCodingErrorAction.REPLACE) { + if (out.remaining() < replacement.length()) { + return TCoderResult.OVERFLOW; + } + out.put(replacement); + } + in.position(in.position() + result.length()); + } + } + } + + public final TCoderResult flush(TCharBuffer out) { + if (state != FLUSH && state != END) { + throw new IllegalStateException(); + } + state = FLUSH; + return implFlush(out); + } + + public final TCharsetDecoder reset() { + state = INIT; + implReset(); + return this; + } + + public final TCharBuffer decode(TByteBuffer in) throws TCharacterCodingException { + if (state != INIT && state != FLUSH) { + throw new IllegalStateException(); + } + if (in.remaining() == 0) { + return TCharBuffer.allocate(0); + } + if (state != INIT) { + reset(); + } + + TCharBuffer out = TCharBuffer.allocate((int)(averageCharsPerByte * in.remaining())); + TCoderResult result; + while (true) { + result = decode(in, out, false); + if (result.isUnderflow()) { + break; + } else if (result.isOverflow()) { + out = expand(out); + } + if (result.isError()) { + result.throwException(); + } + } + + result = decode(in, out, true); + if (result.isError()) { + result.throwException(); + } + + while (true) { + result = flush(out); + if (result.isUnderflow()) { + break; + } else { + out = expand(out); + } + } + + out.flip(); + return out; + } + + public boolean isAutoDetecting() { + return false; + } + + public boolean isCharsetDetected() { + throw new UnsupportedOperationException(); + } + + public TCharset detectedCharset() { + throw new UnsupportedOperationException(); + } + + private TCharBuffer expand(TCharBuffer buffer) { + char[] array = buffer.array(); + array = Arrays.copyOf(array, array.length * 2); + TCharBuffer result = TCharBuffer.wrap(array); + result.position(buffer.position()); + return result; + } + + protected abstract TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out); + + protected TCoderResult implFlush(@SuppressWarnings("unused") TCharBuffer out) { + return TCoderResult.UNDERFLOW; + } + + protected void implReset() { + } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetEncoder.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetEncoder.java index f09f10d8f..65282e5eb 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetEncoder.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TCharsetEncoder.java @@ -15,6 +15,7 @@ */ package org.teavm.classlib.java.nio.charset; +import java.util.Arrays; import org.teavm.classlib.java.nio.TByteBuffer; import org.teavm.classlib.java.nio.TCharBuffer; @@ -23,11 +24,11 @@ import org.teavm.classlib.java.nio.TCharBuffer; * @author Alexey Andreev */ public abstract class TCharsetEncoder { - private static final int READY = 0; + private static final int READY = 4; private static final int ONGOING = 1; private static final int END = 2; private static final int FLUSH = 3; - private static final int INIT = 4; + private static final int INIT = 0; private TCharset charset; private byte[] replacement; private float averageBytesPerChar; @@ -35,7 +36,6 @@ public abstract class TCharsetEncoder { private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT; private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT; private int status; - private boolean finished; protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement) { checkReplacement(replacement); @@ -43,7 +43,6 @@ public abstract class TCharsetEncoder { this.replacement = replacement.clone(); this.averageBytesPerChar = averageBytesPerChar; this.maxBytesPerChar = maxBytesPerChar; - status = INIT; } protected TCharsetEncoder(TCharset cs, float averageBytesPerChar, float maxBytesPerChar) { @@ -115,25 +114,19 @@ public abstract class TCharsetEncoder { } public final TCoderResult encode(TCharBuffer in, TByteBuffer out, boolean endOfInput) { - if (status == READY && finished && !endOfInput) { - throw new IllegalStateException(); - } - if (status == FLUSH || !endOfInput && status == END) { throw new IllegalStateException(); } + status = endOfInput ? END : ONGOING; TCoderResult result; while (true) { try { result = encodeLoop(in, out); - } catch (TBufferOverflowException e) { - throw new TCoderMalfunctionError(e); - } catch (TBufferUnderflowException e) { + } catch (RuntimeException e) { throw new TCoderMalfunctionError(e); } - if (result == TCoderResult.UNDERFLOW) { - status = endOfInput ? END : ONGOING; + if (result.isUnderflow()) { if (endOfInput) { int remaining = in.remaining(); if (remaining > 0) { @@ -144,14 +137,10 @@ public abstract class TCharsetEncoder { } else { return result; } - } else if (result == TCoderResult.OVERFLOW) { - status = endOfInput ? END : ONGOING; + } else if (result.isOverflow()) { return result; } - TCodingErrorAction action = malformedAction; - if (result.isUnmappable()) { - action = unmappableAction; - } + TCodingErrorAction action = result.isUnmappable() ? unmappableAction : malformedAction; if (action == TCodingErrorAction.REPLACE) { if (out.remaining() < replacement.length) { return TCoderResult.OVERFLOW; @@ -171,9 +160,9 @@ public abstract class TCharsetEncoder { return TByteBuffer.allocate(0); } reset(); - int length = (int)(in.remaining() * averageBytesPerChar); - TByteBuffer output = TByteBuffer.allocate(length); - TCoderResult result = null; + TByteBuffer output = TByteBuffer.allocate((int)(in.remaining() * averageBytesPerChar)); + + TCoderResult result; while (true) { result = encode(in, output, false); if (result == TCoderResult.UNDERFLOW) { @@ -182,31 +171,25 @@ public abstract class TCharsetEncoder { output = allocateMore(output); continue; } - checkCoderResult(result); + if (result.isError()) { + result.throwException(); + } } + result = encode(in, output, true); - checkCoderResult(result); + if (result.isError()) { + result.throwException(); + } while (true) { result = flush(output); - if (result == TCoderResult.UNDERFLOW) { - output.flip(); + if (result.isUnderflow()) { break; - } else if (result == TCoderResult.OVERFLOW) { + } else if (result.isOverflow()) { output = allocateMore(output); - continue; } - checkCoderResult(result); - output.flip(); - if (result.isMalformed()) { - throw new TMalformedInputException(result.length()); - } else if (result.isUnmappable()) { - throw new TUnmappableCharacterException(result.length()); - } - break; } - status = READY; - finished = true; + output.flip(); return output; } @@ -249,21 +232,11 @@ public abstract class TCharsetEncoder { return implCanEncode(cb); } - private void checkCoderResult(TCoderResult result) throws TCharacterCodingException { - if (malformedAction == TCodingErrorAction.REPORT && result.isMalformed()) { - throw new TMalformedInputException(result.length()); - } else if (unmappableAction == TCodingErrorAction.REPORT && result.isUnmappable()) { - throw new TUnmappableCharacterException(result.length()); - } - } - - private TByteBuffer allocateMore(TByteBuffer output) { - if (output.capacity() == 0) { - return TByteBuffer.allocate(1); - } - TByteBuffer result = TByteBuffer.allocate(output.capacity() * 2); - output.flip(); - result.put(output); + private TByteBuffer allocateMore(TByteBuffer buffer) { + byte[] array = buffer.array(); + array = Arrays.copyOf(array, array.length * 2); + TByteBuffer result = TByteBuffer.wrap(array); + result.position(buffer.position()); return result; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TIllegalCharsetNameException.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TIllegalCharsetNameException.java new file mode 100644 index 000000000..8915fbcda --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TIllegalCharsetNameException.java @@ -0,0 +1,33 @@ +/* + * Copyright 2015 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +/** + * + * @author Alexey Andreev + */ +public class TIllegalCharsetNameException extends IllegalArgumentException { + private static final long serialVersionUID = -6848948447254188722L; + private String charsetName; + + public TIllegalCharsetNameException(String charsetName) { + this.charsetName = charsetName; + } + + public String getCharsetName() { + return charsetName; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TUnsupportedCharsetException.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TUnsupportedCharsetException.java new file mode 100644 index 000000000..a97fdeea8 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/TUnsupportedCharsetException.java @@ -0,0 +1,33 @@ +/* + * Copyright 2015 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset; + +/** + * + * @author Alexey Andreev + */ +public class TUnsupportedCharsetException extends IllegalArgumentException { + private static final long serialVersionUID = 2668607022458967777L; + private String charsetName; + + public TUnsupportedCharsetException(String charsetName) { + this.charsetName = charsetName; + } + + public String getCharsetName() { + return charsetName; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Charset.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Charset.java new file mode 100644 index 000000000..d9d8449d0 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Charset.java @@ -0,0 +1,45 @@ +/* + * Copyright 2015 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCharsetDecoder; +import org.teavm.classlib.java.nio.charset.TCharsetEncoder; + +/** + * + * @author Alexey Andreev + */ +public class UTF8Charset extends TCharset { + public UTF8Charset() { + super("UTF-8", new String[0]); + } + + @Override + public boolean contains(TCharset cs) { + return cs == this; + } + + @Override + public TCharsetDecoder newDecoder() { + return new UTF8Decoder(this); + } + + @Override + public TCharsetEncoder newEncoder() { + return null; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Decoder.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Decoder.java new file mode 100644 index 000000000..b7c93bb17 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/nio/charset/impl/UTF8Decoder.java @@ -0,0 +1,83 @@ +/* + * Copyright 2015 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.nio.charset.impl; + +import org.teavm.classlib.impl.charset.UTF16Helper; +import org.teavm.classlib.java.nio.TByteBuffer; +import org.teavm.classlib.java.nio.TCharBuffer; +import org.teavm.classlib.java.nio.charset.TCharset; +import org.teavm.classlib.java.nio.charset.TCharsetDecoder; +import org.teavm.classlib.java.nio.charset.TCoderResult; + +/** + * + * @author Alexey Andreev + */ +public class UTF8Decoder extends TCharsetDecoder { + public UTF8Decoder(TCharset cs) { + super(cs, 1f / 3, 0.5f); + } + + @Override + protected TCoderResult decodeLoop(TByteBuffer in, TCharBuffer out) { + while (true) { + if (in.remaining() < 4) { + return TCoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return TCoderResult.OVERFLOW; + } + int b = in.get() & 0xFF; + if ((b & 0x80) == 0) { + out.put((char)b); + } else if ((b & 0xE0) == 0xC0) { + if (!in.hasRemaining()) { + in.position(in.position() - 1); + return TCoderResult.UNDERFLOW; + } + out.put((char)(((b & 0x1F) << 6) | (in.get() & 0x3F))); + } else if ((b & 0xF0) == 0xE0) { + if (in.remaining() < 2) { + in.position(in.position() - 1); + return TCoderResult.UNDERFLOW; + } + byte b2 = in.get(); + byte b3 = in.get(); + char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F)); + if (Character.isSurrogate(c)) { + in.position(in.position() - 2); + return TCoderResult.malformedForLength(3); + } + out.put(c); + } else if ((b & 0xF8) == 0xF0) { + if (in.remaining() < 3) { + in.position(in.position() - 1); + return TCoderResult.UNDERFLOW; + } + if (out.remaining() < 3) { + in.position(in.position() - 1); + return TCoderResult.OVERFLOW; + } + byte b2 = in.get(); + byte b3 = in.get(); + byte b4 = in.get(); + int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F); + out.put(UTF16Helper.highSurrogate(code)); + out.put(UTF16Helper.lowSurrogate(code)); + } + } + } +}