/* * Copyright (C) 2009 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.common.escape; import static com.google.common.base.Preconditions.checkNotNull; import java.util.HashMap; import java.util.Map; import javax.annotation.Nullable; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtCompatible; /** * Static utility methods pertaining to {@link Escaper} instances. * * @author Sven Mawson * @author David Beaumont * @since 15.0 */ @Beta @GwtCompatible public final class Escapers { private Escapers() { } /** * Returns an {@link Escaper} that does no escaping, passing all character data * through unchanged. */ public static Escaper nullEscaper() { return NULL_ESCAPER; } // An Escaper that efficiently performs no escaping. // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier. private static final Escaper NULL_ESCAPER = new CharEscaper() { @Override public String escape(String string) { return checkNotNull(string); } @Override protected char[] escape(char c) { // TODO: Fix tests not to call this directly and make it throw an error. return null; } }; /** * Returns a builder for creating simple, fast escapers. A builder instance can * be reused and each escaper that is created will be a snapshot of the current * builder state. Builders are not thread safe. * *
* The initial state of the builder is such that: *
* For performance reasons escapers created by this builder are not Unicode * aware and will not validate the well-formedness of their input. */ public static Builder builder() { return new Builder(); } /** * A builder for simple, fast escapers. * *
* Typically an escaper needs to deal with the escaping of high valued
* characters or code points. In these cases it is necessary to extend either
* {@link ArrayBasedCharEscaper} or {@link ArrayBasedUnicodeEscaper} to provide
* the desired behavior. However this builder is suitable for creating escapers
* that replace a relative small set of characters.
*
* @author David Beaumont
* @since 15.0
*/
@Beta
public static final class Builder {
private final Map
* When a {@link CharEscaper} escaper is wrapped by this method it acquires
* extra behavior with respect to the well-formedness of Unicode character
* sequences and will throw {@link IllegalArgumentException} when given bad
* input.
*
* @param escaper the instance to be wrapped
* @return a UnicodeEscaper with the same behavior as the given instance
* @throws NullPointerException if escaper is null
* @throws IllegalArgumentException if escaper is not a UnicodeEscaper or a
* CharEscaper
*/
static UnicodeEscaper asUnicodeEscaper(Escaper escaper) {
checkNotNull(escaper);
if (escaper instanceof UnicodeEscaper) {
return (UnicodeEscaper) escaper;
} else if (escaper instanceof CharEscaper) {
return wrap((CharEscaper) escaper);
}
// In practice this shouldn't happen because it would be very odd not to
// extend either CharEscaper or UnicodeEscaper for non trivial cases.
throw new IllegalArgumentException("Cannot create a UnicodeEscaper from: " + escaper.getClass().getName());
}
/**
* Returns a string that would replace the given character in the specified
* escaper, or {@code null} if no replacement should be made. This method is
* intended for use in tests through the {@code EscaperAsserts} class;
* production users of {@link CharEscaper} should limit themselves to its public
* interface.
*
* @param c the character to escape if necessary
* @return the replacement string, or {@code null} if no escaping was needed
*/
public static String computeReplacement(CharEscaper escaper, char c) {
return stringOrNull(escaper.escape(c));
}
/**
* Returns a string that would replace the given character in the specified
* escaper, or {@code null} if no replacement should be made. This method is
* intended for use in tests through the {@code EscaperAsserts} class;
* production users of {@link UnicodeEscaper} should limit themselves to its
* public interface.
*
* @param cp the Unicode code point to escape if necessary
* @return the replacement string, or {@code null} if no escaping was needed
*/
public static String computeReplacement(UnicodeEscaper escaper, int cp) {
return stringOrNull(escaper.escape(cp));
}
private static String stringOrNull(char[] in) {
return (in == null) ? null : new String(in);
}
/** Private helper to wrap a CharEscaper as a UnicodeEscaper. */
private static UnicodeEscaper wrap(final CharEscaper escaper) {
return new UnicodeEscaper() {
@Override
protected char[] escape(int cp) {
// If a code point maps to a single character, just escape that.
if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
return escaper.escape((char) cp);
}
// Convert the code point to a surrogate pair and escape them both.
// Note: This code path is horribly slow and typically allocates 4 new
// char[] each time it is invoked. However this avoids any
// synchronization issues and makes the escaper thread safe.
char[] surrogateChars = new char[2];
Character.toChars(cp, surrogateChars, 0);
char[] hiChars = escaper.escape(surrogateChars[0]);
char[] loChars = escaper.escape(surrogateChars[1]);
// If either hiChars or lowChars are non-null, the CharEscaper is trying
// to escape the characters of a surrogate pair separately. This is
// uncommon and applies only to escapers that assume UCS-2 rather than
// UTF-16. See: http://en.wikipedia.org/wiki/UTF-16/UCS-2
if (hiChars == null && loChars == null) {
// We expect this to be the common code path for most escapers.
return null;
}
// Combine the characters and/or escaped sequences into a single array.
int hiCount = hiChars != null ? hiChars.length : 1;
int loCount = loChars != null ? loChars.length : 1;
char[] output = new char[hiCount + loCount];
if (hiChars != null) {
// TODO: Is this faster than System.arraycopy() for small arrays?
for (int n = 0; n < hiChars.length; ++n) {
output[n] = hiChars[n];
}
} else {
output[0] = surrogateChars[0];
}
if (loChars != null) {
for (int n = 0; n < loChars.length; ++n) {
output[hiCount + n] = loChars[n];
}
} else {
output[hiCount] = surrogateChars[1];
}
return output;
}
};
}
}