/* * Copyright (C) 2006 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.common.escape; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtCompatible; /** * An object that converts literal text into a format safe for inclusion in a * particular context (such as an XML document). Typically (but not always), the * inverse process of "unescaping" the text is performed automatically by the * relevant parser. * * <p> * For example, an XML escaper would convert the literal string * {@code "Foo<Bar>"} into {@code * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML * tag. When the resulting XML document is parsed, the parser API will return * this text as the original literal string {@code "Foo<Bar>"}. * * <p> * A {@code CharEscaper} instance is required to be stateless, and safe when * used concurrently by multiple threads. * * <p> * Several popular escapers are defined as constants in classes like * {@link com.google.common.html.HtmlEscapers}, * {@link com.google.common.xml.XmlEscapers}, and {@link SourceCodeEscapers}. To * create your own escapers extend this class and implement the * {@link #escape(char)} method. * * @author Sven Mawson * @since 15.0 */ @Beta @GwtCompatible public abstract class CharEscaper extends Escaper { /** Constructor for use by subclasses. */ protected CharEscaper() { } /** * Returns the escaped form of a given literal string. * * @param string the literal string to be escaped * @return the escaped form of {@code string} * @throws NullPointerException if {@code string} is null */ @Override public String escape(String string) { checkNotNull(string); // GWT specific check (do not optimize) // Inlineable fast-path loop which hands off to escapeSlow() only if needed int length = string.length(); for (int index = 0; index < length; index++) { if (escape(string.charAt(index)) != null) { return escapeSlow(string, index); } } return string; } /** * Returns the escaped form of a given literal string, starting at the given * index. This method is called by the {@link #escape(String)} method when it * discovers that escaping is required. It is protected to allow subclasses to * override the fastpath escaping function to inline their escaping test. See * {@link CharEscaperBuilder} for an example usage. * * @param s the literal string to be escaped * @param index the index to start escaping from * @return the escaped form of {@code string} * @throws NullPointerException if {@code string} is null */ protected final String escapeSlow(String s, int index) { int slen = s.length(); // Get a destination buffer and setup some loop variables. char[] dest = Platform.charBufferFromThreadLocal(); int destSize = dest.length; int destIndex = 0; int lastEscape = 0; // Loop through the rest of the string, replacing when needed into the // destination buffer, which gets grown as needed as well. for (; index < slen; index++) { // Get a replacement for the current character. char[] r = escape(s.charAt(index)); // If no replacement is needed, just continue. if (r == null) continue; int rlen = r.length; int charsSkipped = index - lastEscape; // This is the size needed to add the replacement, not the full size // needed by the string. We only regrow when we absolutely must, and // when we do grow, grow enough to avoid excessive growing. Grow. int sizeNeeded = destIndex + charsSkipped + rlen; if (destSize < sizeNeeded) { destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index); dest = growBuffer(dest, destIndex, destSize); } // If we have skipped any characters, we need to copy them now. if (charsSkipped > 0) { s.getChars(lastEscape, index, dest, destIndex); destIndex += charsSkipped; } // Copy the replacement string into the dest buffer as needed. if (rlen > 0) { System.arraycopy(r, 0, dest, destIndex, rlen); destIndex += rlen; } lastEscape = index + 1; } // Copy leftover characters if there are any. int charsLeft = slen - lastEscape; if (charsLeft > 0) { int sizeNeeded = destIndex + charsLeft; if (destSize < sizeNeeded) { // Regrow and copy, expensive! No padding as this is the final copy. dest = growBuffer(dest, destIndex, sizeNeeded); } s.getChars(lastEscape, slen, dest, destIndex); destIndex = sizeNeeded; } return new String(dest, 0, destIndex); } /** * Returns the escaped form of the given character, or {@code null} if this * character does not need to be escaped. If an empty array is returned, this * effectively strips the input character from the resulting text. * * <p> * If the character does not need to be escaped, this method should return * {@code null}, rather than a one-character array containing the character * itself. This enables the escaping algorithm to perform more efficiently. * * <p> * An escaper is expected to be able to deal with any {@code char} value, so * this method should not throw any exceptions. * * @param c the character to escape if necessary * @return the replacement characters, or {@code null} if no escaping was needed */ protected abstract char[] escape(char c); /** * Helper method to grow the character buffer as needed, this only happens once * in a while so it's ok if it's in a method call. If the index passed in is 0 * then no copying will be done. */ private static char[] growBuffer(char[] dest, int index, int size) { char[] copy = new char[size]; if (index > 0) { System.arraycopy(dest, 0, copy, 0, index); } return copy; } /** * The multiplier for padding to use when growing the escape buffer. */ private static final int DEST_PAD_MULTIPLIER = 2; }