/* * Copyright (C) 2012 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.common.io; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkPositionIndexes; import static com.google.common.base.Preconditions.checkState; import static com.google.common.io.GwtWorkarounds.asCharInput; import static com.google.common.io.GwtWorkarounds.asCharOutput; import static com.google.common.io.GwtWorkarounds.asInputStream; import static com.google.common.io.GwtWorkarounds.asOutputStream; import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; import static com.google.common.math.IntMath.divide; import static com.google.common.math.IntMath.log2; import static java.math.RoundingMode.CEILING; import static java.math.RoundingMode.FLOOR; import static java.math.RoundingMode.UNNECESSARY; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.Writer; import java.util.Arrays; import javax.annotation.CheckReturnValue; import javax.annotation.Nullable; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtCompatible; import com.google.common.annotations.GwtIncompatible; import com.google.common.base.Ascii; import com.google.common.base.CharMatcher; import com.google.common.io.GwtWorkarounds.ByteInput; import com.google.common.io.GwtWorkarounds.ByteOutput; import com.google.common.io.GwtWorkarounds.CharInput; import com.google.common.io.GwtWorkarounds.CharOutput; /** * A binary encoding scheme for reversibly translating between byte sequences * and printable ASCII strings. This class includes several constants for * encoding schemes specified by * RFC 4648. For example, the * expression: * *
* {@code * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))} ** *
* returns the string {@code "MZXW6==="}, and * *
* { * @code * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); * } ** *
* ...returns the ASCII bytes of the string {@code "foo"}. * *
* By default, {@code BaseEncoding}'s behavior is relatively strict and in * accordance with RFC 4648. Decoding rejects characters in the wrong case, * though padding is optional. To modify encoding and decoding behavior, use * configuration methods to obtain a new encoding with modified behavior: * *
* {@code * BaseEncoding.base16().lowerCase().decode("deadbeef");} ** *
* Warning: BaseEncoding instances are immutable. Invoking a configuration * method has no effect on the receiving instance; you must store and use the * new encoding instance it returns, instead. * *
* {@code * // Do NOT do this * BaseEncoding hex = BaseEncoding.base16(); * hex.lowerCase(); // does nothing! * return hex.decode("deadbeef"); // throws an IllegalArgumentException} ** *
* It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always * equal to {@code x}, but the reverse does not necessarily hold. * *
*
Encoding * | Alphabet * | {@code char:byte} ratio * | Default padding * | Comments * |
---|---|---|---|---|
{@link #base16()} * | 0-9 A-F * | 2.00 * | N/A * | Traditional hexadecimal. Defaults to upper case. * |
{@link #base32()} * | A-Z 2-7 * | 1.60 * | = * | Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper * case. * |
{@link #base32Hex()} * | 0-9 A-V * | 1.60 * | = * | "Numerical" base 32; extended from the traditional hex alphabet. Defaults * to upper case. * |
{@link #base64()} * | A-Z a-z 0-9 + / * | 1.33 * | = * | * |
{@link #base64Url()} * | A-Z a-z 0-9 - _ * | 1.33 * | = * | Safe to use as filenames, or to pass in URLs without escaping * |
* All instances of this class are immutable, so they may be stored safely as * static constants. * * @author Louis Wasserman * @since 14.0 */ @Beta @GwtCompatible(emulated = true) public abstract class BaseEncoding { // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) BaseEncoding() { } /** * Exception indicating invalid base-encoded input encountered while decoding. * * @author Louis Wasserman * @since 15.0 */ public static final class DecodingException extends IOException { DecodingException(String message) { super(message); } DecodingException(Throwable cause) { super(cause); } } /** * Encodes the specified byte array, and returns the encoded {@code String}. */ public String encode(byte[] bytes) { return encode(checkNotNull(bytes), 0, bytes.length); } /** * Encodes the specified range of the specified byte array, and returns the * encoded {@code String}. */ public final String encode(byte[] bytes, int off, int len) { checkNotNull(bytes); checkPositionIndexes(off, off + len, bytes.length); CharOutput result = stringBuilderOutput(maxEncodedSize(len)); ByteOutput byteOutput = encodingStream(result); try { for (int i = 0; i < len; i++) { byteOutput.write(bytes[off + i]); } byteOutput.close(); } catch (IOException impossible) { throw new AssertionError("impossible"); } return result.toString(); } /** * Returns an {@code OutputStream} that encodes bytes using this encoding into * the specified {@code Writer}. When the returned {@code OutputStream} is * closed, so is the backing {@code Writer}. */ @GwtIncompatible("Writer,OutputStream") public final OutputStream encodingStream(Writer writer) { return asOutputStream(encodingStream(asCharOutput(writer))); } /** * Returns a {@code ByteSink} that writes base-encoded bytes to the specified * {@code CharSink}. */ @GwtIncompatible("ByteSink,CharSink") public final ByteSink encodingSink(final CharSink encodedSink) { checkNotNull(encodedSink); return new ByteSink() { @Override public OutputStream openStream() throws IOException { return encodingStream(encodedSink.openStream()); } }; } // TODO(user): document the extent of leniency, probably after adding // ignore(CharMatcher) private static byte[] extract(byte[] result, int length) { if (length == result.length) { return result; } else { byte[] trunc = new byte[length]; System.arraycopy(result, 0, trunc, 0, length); return trunc; } } /** * Decodes the specified character sequence, and returns the resulting * {@code byte[]}. This is the inverse operation to {@link #encode(byte[])}. * * @throws IllegalArgumentException if the input is not a valid encoded string * according to this encoding. */ public final byte[] decode(CharSequence chars) { try { return decodeChecked(chars); } catch (DecodingException badInput) { throw new IllegalArgumentException(badInput); } } /** * Decodes the specified character sequence, and returns the resulting * {@code byte[]}. This is the inverse operation to {@link #encode(byte[])}. * * @throws DecodingException if the input is not a valid encoded string * according to this encoding. */ final byte[] decodeChecked(CharSequence chars) throws DecodingException { chars = padding().trimTrailingFrom(chars); ByteInput decodedInput = decodingStream(asCharInput(chars)); byte[] tmp = new byte[maxDecodedSize(chars.length())]; int index = 0; try { for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { tmp[index++] = (byte) i; } } catch (DecodingException badInput) { throw badInput; } catch (IOException impossible) { throw new AssertionError(impossible); } return extract(tmp, index); } /** * Returns an {@code InputStream} that decodes base-encoded input from the * specified {@code Reader}. The returned stream throws a * {@link DecodingException} upon decoding-specific errors. */ @GwtIncompatible("Reader,InputStream") public final InputStream decodingStream(Reader reader) { return asInputStream(decodingStream(asCharInput(reader))); } /** * Returns a {@code ByteSource} that reads base-encoded bytes from the specified * {@code CharSource}. */ @GwtIncompatible("ByteSource,CharSource") public final ByteSource decodingSource(final CharSource encodedSource) { checkNotNull(encodedSource); return new ByteSource() { @Override public InputStream openStream() throws IOException { return decodingStream(encodedSource.openStream()); } }; } // Implementations for encoding/decoding abstract int maxEncodedSize(int bytes); abstract ByteOutput encodingStream(CharOutput charOutput); abstract int maxDecodedSize(int chars); abstract ByteInput decodingStream(CharInput charInput); abstract CharMatcher padding(); // Modified encoding generators /** * Returns an encoding that behaves equivalently to this encoding, but omits any * padding characters as specified by * RFC 4648 section * 3.2, Padding of Encoded Data. */ @CheckReturnValue public abstract BaseEncoding omitPadding(); /** * Returns an encoding that behaves equivalently to this encoding, but uses an * alternate character for padding. * * @throws IllegalArgumentException if this padding character is already used in * the alphabet or a separator */ @CheckReturnValue public abstract BaseEncoding withPadChar(char padChar); /** * Returns an encoding that behaves equivalently to this encoding, but adds a * separator string after every {@code n} characters. Any occurrences of any * characters that occur in the separator are skipped over in decoding. * * @throws IllegalArgumentException if any alphabet or padding characters * appear in the separator string, or if * {@code n <= 0} * @throws UnsupportedOperationException if this encoding already uses a * separator */ @CheckReturnValue public abstract BaseEncoding withSeparator(String separator, int n); /** * Returns an encoding that behaves equivalently to this encoding, but encodes * and decodes with uppercase letters. Padding and separator characters remain * in their original case. * * @throws IllegalStateException if the alphabet used by this encoding contains * mixed upper- and lower-case characters */ @CheckReturnValue public abstract BaseEncoding upperCase(); /** * Returns an encoding that behaves equivalently to this encoding, but encodes * and decodes with lowercase letters. Padding and separator characters remain * in their original case. * * @throws IllegalStateException if the alphabet used by this encoding contains * mixed upper- and lower-case characters */ @CheckReturnValue public abstract BaseEncoding lowerCase(); private static final BaseEncoding BASE64 = new StandardBaseEncoding("base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); /** * The "base64" base encoding specified by * RFC 4648 section * 4, Base 64 Encoding. (This is the same as the base 64 encoding from * RFC 3548.) * *
* The character {@code '='} is used for padding, but can be * {@linkplain #omitPadding() omitted} or {@linkplain #withPadChar(char) * replaced}. * *
* No line feeds are added by default, as per * RFC 4648 section * 3.1, Line Feeds in Encoded Data. Line feeds may be added using * {@link #withSeparator(String, int)}. */ public static BaseEncoding base64() { return BASE64; } private static final BaseEncoding BASE64_URL = new StandardBaseEncoding("base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); /** * The "base64url" encoding specified by * RFC 4648 section * 5, Base 64 Encoding with URL and Filename Safe Alphabet, also sometimes * referred to as the "web safe Base64." (This is the same as the base 64 * encoding with URL and filename safe alphabet from * RFC 3548.) * *
* The character {@code '='} is used for padding, but can be * {@linkplain #omitPadding() omitted} or {@linkplain #withPadChar(char) * replaced}. * *
* No line feeds are added by default, as per * RFC 4648 section * 3.1, Line Feeds in Encoded Data. Line feeds may be added using * {@link #withSeparator(String, int)}. */ public static BaseEncoding base64Url() { return BASE64_URL; } private static final BaseEncoding BASE32 = new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); /** * The "base32" encoding specified by * RFC 4648 section * 6, Base 32 Encoding. (This is the same as the base 32 encoding from * RFC 3548.) * *
* The character {@code '='} is used for padding, but can be * {@linkplain #omitPadding() omitted} or {@linkplain #withPadChar(char) * replaced}. * *
* No line feeds are added by default, as per * RFC 4648 section * 3.1, Line Feeds in Encoded Data. Line feeds may be added using * {@link #withSeparator(String, int)}. */ public static BaseEncoding base32() { return BASE32; } private static final BaseEncoding BASE32_HEX = new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); /** * The "base32hex" encoding specified by * RFC 4648 section * 7, Base 32 Encoding with Extended Hex Alphabet. There is no corresponding * encoding in RFC 3548. * *
* The character {@code '='} is used for padding, but can be * {@linkplain #omitPadding() omitted} or {@linkplain #withPadChar(char) * replaced}. * *
* No line feeds are added by default, as per * RFC 4648 section * 3.1, Line Feeds in Encoded Data. Line feeds may be added using * {@link #withSeparator(String, int)}. */ public static BaseEncoding base32Hex() { return BASE32_HEX; } private static final BaseEncoding BASE16 = new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); /** * The "base16" encoding specified by * RFC 4648 section * 8, Base 16 Encoding. (This is the same as the base 16 encoding from * RFC 3548.) This is * commonly known as "hexadecimal" format. * *
* No padding is necessary in base 16, so {@link #withPadChar(char)} and * {@link #omitPadding()} have no effect. * *
* No line feeds are added by default, as per * RFC 4648 section * 3.1, Line Feeds in Encoded Data. Line feeds may be added using * {@link #withSeparator(String, int)}. */ public static BaseEncoding base16() { return BASE16; } private static final class Alphabet extends CharMatcher { private final String name; // this is meant to be immutable -- don't modify it! private final char[] chars; final int mask; final int bitsPerChar; final int charsPerChunk; final int bytesPerChunk; private final byte[] decodabet; private final boolean[] validPadding; Alphabet(String name, char[] chars) { this.name = checkNotNull(name); this.chars = checkNotNull(chars); try { this.bitsPerChar = log2(chars.length, UNNECESSARY); } catch (ArithmeticException e) { throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); } /* * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == * 3. This makes for the smallest chunk size that still has charsPerChunk * * bitsPerChar be a multiple of 8. */ int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); this.charsPerChunk = 8 / gcd; this.bytesPerChunk = bitsPerChar / gcd; this.mask = chars.length - 1; byte[] decodabet = new byte[Ascii.MAX + 1]; Arrays.fill(decodabet, (byte) -1); for (int i = 0; i < chars.length; i++) { char c = chars[i]; checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); decodabet[c] = (byte) i; } this.decodabet = decodabet; boolean[] validPadding = new boolean[charsPerChunk]; for (int i = 0; i < bytesPerChunk; i++) { validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; } this.validPadding = validPadding; } char encode(int bits) { return chars[bits]; } boolean isValidPaddingStartPosition(int index) { return validPadding[index % charsPerChunk]; } int decode(char ch) throws IOException { if (ch > Ascii.MAX || decodabet[ch] == -1) { throw new DecodingException("Unrecognized character: " + ch); } return decodabet[ch]; } private boolean hasLowerCase() { for (char c : chars) { if (Ascii.isLowerCase(c)) { return true; } } return false; } private boolean hasUpperCase() { for (char c : chars) { if (Ascii.isUpperCase(c)) { return true; } } return false; } Alphabet upperCase() { if (!hasLowerCase()) { return this; } else { checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); char[] upperCased = new char[chars.length]; for (int i = 0; i < chars.length; i++) { upperCased[i] = Ascii.toUpperCase(chars[i]); } return new Alphabet(name + ".upperCase()", upperCased); } } Alphabet lowerCase() { if (!hasUpperCase()) { return this; } else { checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); char[] lowerCased = new char[chars.length]; for (int i = 0; i < chars.length; i++) { lowerCased[i] = Ascii.toLowerCase(chars[i]); } return new Alphabet(name + ".lowerCase()", lowerCased); } } @Override public boolean matches(char c) { return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; } @Override public String toString() { return name; } } static final class StandardBaseEncoding extends BaseEncoding { // TODO(user): provide a useful toString private final Alphabet alphabet; @Nullable private final Character paddingChar; StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); } StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { this.alphabet = checkNotNull(alphabet); checkArgument(paddingChar == null || !alphabet.matches(paddingChar), "Padding character %s was already in alphabet", paddingChar); this.paddingChar = paddingChar; } @Override CharMatcher padding() { return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); } @Override int maxEncodedSize(int bytes) { return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); } @Override ByteOutput encodingStream(final CharOutput out) { checkNotNull(out); return new ByteOutput() { int bitBuffer = 0; int bitBufferLength = 0; int writtenChars = 0; @Override public void write(byte b) throws IOException { bitBuffer <<= 8; bitBuffer |= b & 0xFF; bitBufferLength += 8; while (bitBufferLength >= alphabet.bitsPerChar) { int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; out.write(alphabet.encode(charIndex)); writtenChars++; bitBufferLength -= alphabet.bitsPerChar; } } @Override public void flush() throws IOException { out.flush(); } @Override public void close() throws IOException { if (bitBufferLength > 0) { int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; out.write(alphabet.encode(charIndex)); writtenChars++; if (paddingChar != null) { while (writtenChars % alphabet.charsPerChunk != 0) { out.write(paddingChar.charValue()); writtenChars++; } } } out.close(); } }; } @Override int maxDecodedSize(int chars) { return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); } @Override ByteInput decodingStream(final CharInput reader) { checkNotNull(reader); return new ByteInput() { int bitBuffer = 0; int bitBufferLength = 0; int readChars = 0; boolean hitPadding = false; final CharMatcher paddingMatcher = padding(); @Override public int read() throws IOException { while (true) { int readChar = reader.read(); if (readChar == -1) { if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { throw new DecodingException("Invalid input length " + readChars); } return -1; } readChars++; char ch = (char) readChar; if (paddingMatcher.matches(ch)) { if (!hitPadding && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { throw new DecodingException("Padding cannot start at index " + readChars); } hitPadding = true; } else if (hitPadding) { throw new DecodingException( "Expected padding character but found '" + ch + "' at index " + readChars); } else { bitBuffer <<= alphabet.bitsPerChar; bitBuffer |= alphabet.decode(ch); bitBufferLength += alphabet.bitsPerChar; if (bitBufferLength >= 8) { bitBufferLength -= 8; return (bitBuffer >> bitBufferLength) & 0xFF; } } } } @Override public void close() throws IOException { reader.close(); } }; } @Override public BaseEncoding omitPadding() { return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); } @Override public BaseEncoding withPadChar(char padChar) { if (8 % alphabet.bitsPerChar == 0 || (paddingChar != null && paddingChar.charValue() == padChar)) { return this; } else { return new StandardBaseEncoding(alphabet, padChar); } } @Override public BaseEncoding withSeparator(String separator, int afterEveryChars) { checkNotNull(separator); checkArgument(padding().or(alphabet).matchesNoneOf(separator), "Separator cannot contain alphabet or padding characters"); return new SeparatedBaseEncoding(this, separator, afterEveryChars); } private transient BaseEncoding upperCase; private transient BaseEncoding lowerCase; @Override public BaseEncoding upperCase() { BaseEncoding result = upperCase; if (result == null) { Alphabet upper = alphabet.upperCase(); result = upperCase = (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); } return result; } @Override public BaseEncoding lowerCase() { BaseEncoding result = lowerCase; if (result == null) { Alphabet lower = alphabet.lowerCase(); result = lowerCase = (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); } return result; } @Override public String toString() { StringBuilder builder = new StringBuilder("BaseEncoding."); builder.append(alphabet.toString()); if (8 % alphabet.bitsPerChar != 0) { if (paddingChar == null) { builder.append(".omitPadding()"); } else { builder.append(".withPadChar(").append(paddingChar).append(')'); } } return builder.toString(); } } static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { checkNotNull(delegate); checkNotNull(toIgnore); return new CharInput() { @Override public int read() throws IOException { int readChar; do { readChar = delegate.read(); } while (readChar != -1 && toIgnore.matches((char) readChar)); return readChar; } @Override public void close() throws IOException { delegate.close(); } }; } static CharOutput separatingOutput(final CharOutput delegate, final String separator, final int afterEveryChars) { checkNotNull(delegate); checkNotNull(separator); checkArgument(afterEveryChars > 0); return new CharOutput() { int charsUntilSeparator = afterEveryChars; @Override public void write(char c) throws IOException { if (charsUntilSeparator == 0) { for (int i = 0; i < separator.length(); i++) { delegate.write(separator.charAt(i)); } charsUntilSeparator = afterEveryChars; } delegate.write(c); charsUntilSeparator--; } @Override public void flush() throws IOException { delegate.flush(); } @Override public void close() throws IOException { delegate.close(); } }; } static final class SeparatedBaseEncoding extends BaseEncoding { private final BaseEncoding delegate; private final String separator; private final int afterEveryChars; private final CharMatcher separatorChars; SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { this.delegate = checkNotNull(delegate); this.separator = checkNotNull(separator); this.afterEveryChars = afterEveryChars; checkArgument(afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); this.separatorChars = CharMatcher.anyOf(separator).precomputed(); } @Override CharMatcher padding() { return delegate.padding(); } @Override int maxEncodedSize(int bytes) { int unseparatedSize = delegate.maxEncodedSize(bytes); return unseparatedSize + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); } @Override ByteOutput encodingStream(final CharOutput output) { return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); } @Override int maxDecodedSize(int chars) { return delegate.maxDecodedSize(chars); } @Override ByteInput decodingStream(final CharInput input) { return delegate.decodingStream(ignoringInput(input, separatorChars)); } @Override public BaseEncoding omitPadding() { return delegate.omitPadding().withSeparator(separator, afterEveryChars); } @Override public BaseEncoding withPadChar(char padChar) { return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); } @Override public BaseEncoding withSeparator(String separator, int afterEveryChars) { throw new UnsupportedOperationException("Already have a separator"); } @Override public BaseEncoding upperCase() { return delegate.upperCase().withSeparator(separator, afterEveryChars); } @Override public BaseEncoding lowerCase() { return delegate.lowerCase().withSeparator(separator, afterEveryChars); } @Override public String toString() { return delegate.toString() + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; } } }