/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang3; /** *

* Operations on {@link CharSequence} that are {@code null} safe. *

* * @see CharSequence * @since 3.0 */ public class CharSequenceUtils { private static final int NOT_FOUND = -1; /** *

* {@code CharSequenceUtils} instances should NOT be constructed in standard * programming. *

* *

* This constructor is public to permit tools that require a JavaBean instance * to operate. *

*/ public CharSequenceUtils() { } // ----------------------------------------------------------------------- /** *

* Returns a new {@code CharSequence} that is a subsequence of this sequence * starting with the {@code char} value at the specified index. *

* *

* This provides the {@code CharSequence} equivalent to * {@link String#substring(int)}. The length (in {@code char}) of the returned * sequence is {@code length() - start}, so if {@code start == end} then an * empty sequence is returned. *

* * @param cs the specified subsequence, null returns null * @param start the start index, inclusive, valid * @return a new subsequence, may be null * @throws IndexOutOfBoundsException if {@code start} is negative or if * {@code start} is greater than * {@code length()} */ public static CharSequence subSequence(final CharSequence cs, final int start) { return cs == null ? null : cs.subSequence(start, cs.length()); } // ----------------------------------------------------------------------- /** * Returns the index within {@code cs} of the first occurrence of the specified * character, starting the search at the specified index. *

* If a character with value {@code searchChar} occurs in the character sequence * represented by the {@code cs} object at an index no smaller than * {@code start}, then the index of the first such occurrence is returned. For * values of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), this * is the smallest value k such that:

* *

	 * (this.charAt(k) == searchChar) && (k >= start)
	 *

* *

is true. For other values of {@code searchChar}, it is the * smallest value k such that:

* *

	 * (this.codePointAt(k) == searchChar) && (k >= start)
	 *

* *

is true. In either case, if no such character occurs inm * {@code cs} at or after position {@code start}, then {@code -1} is returned. * *

* There is no restriction on the value of {@code start}. If it is negative, it * has the same effect as if it were zero: the entire {@code CharSequence} may * be searched. If it is greater than the length of {@code cs}, it has the same * effect as if it were equal to the length of {@code cs}: {@code -1} is * returned. * *

* All indices are specified in {@code char} values (Unicode code units). * * @param cs the {@code CharSequence} to be processed, not null * @param searchChar the char to be searched for * @param start the start index, negative starts at the string start * @return the index where the search char was found, -1 if not found * @since 3.6 updated to behave more like {@code String} */ static int indexOf(final CharSequence cs, final int searchChar, int start) { if (cs instanceof String) { return ((String) cs).indexOf(searchChar, start); } final int sz = cs.length(); if (start < 0) { start = 0; } if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { for (int i = start; i < sz; i++) { if (cs.charAt(i) == searchChar) { return i; } } return NOT_FOUND; } // supplementary characters (LANG1300) if (searchChar <= Character.MAX_CODE_POINT) { final char[] chars = Character.toChars(searchChar); for (int i = start; i < sz - 1; i++) { final char high = cs.charAt(i); final char low = cs.charAt(i + 1); if (high == chars[0] && low == chars[1]) { return i; } } } return NOT_FOUND; } /** * Used by the indexOf(CharSequence methods) as a green implementation of * indexOf. * * @param cs the {@code CharSequence} to be processed * @param searchChar the {@code CharSequence} to be searched for * @param start the start index * @return the index where the search sequence was found */ static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { if (cs instanceof String) { return ((String) cs).indexOf(searchChar.toString(), start); } else if (cs instanceof StringBuilder) { return ((StringBuilder) cs).indexOf(searchChar.toString(), start); } else if (cs instanceof StringBuffer) { return ((StringBuffer) cs).indexOf(searchChar.toString(), start); } return cs.toString().indexOf(searchChar.toString(), start); // if (cs instanceof String && searchChar instanceof String) { // // TODO: Do we assume searchChar is usually relatively small; // // If so then calling toString() on it is better than reverting to // // the green implementation in the else block // return ((String) cs).indexOf((String) searchChar, start); // } else { // // TODO: Implement rather than convert to String // return cs.toString().indexOf(searchChar.toString(), start); // } } /** * Returns the index within {@code cs} of the last occurrence of the specified * character, searching backward starting at the specified index. For values of * {@code searchChar} in the range from 0 to 0xFFFF (inclusive), the index * returned is the largest value k such that:

* *

	 * (this.charAt(k) == searchChar) && (k <= start)
	 *

* *

is true. For other values of {@code searchChar}, it is the * largest value k such that:

* *

	 * (this.codePointAt(k) == searchChar) && (k <= start)
	 *

* *

is true. In either case, if no such character occurs in * {@code cs} at or before position {@code start}, then {@code -1} is returned. * *

* All indices are specified in {@code char} values (Unicode code units). * * @param cs the {@code CharSequence} to be processed * @param searchChar the char to be searched for * @param start the start index, negative returns -1, beyond length starts * at end * @return the index where the search char was found, -1 if not found * @since 3.6 updated to behave more like {@code String} */ static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { if (cs instanceof String) { return ((String) cs).lastIndexOf(searchChar, start); } final int sz = cs.length(); if (start < 0) { return NOT_FOUND; } if (start >= sz) { start = sz - 1; } if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { for (int i = start; i >= 0; --i) { if (cs.charAt(i) == searchChar) { return i; } } return NOT_FOUND; } // supplementary characters (LANG1300) // NOTE - we must do a forward traversal for this to avoid duplicating code // points if (searchChar <= Character.MAX_CODE_POINT) { final char[] chars = Character.toChars(searchChar); // make sure it's not the last index if (start == sz - 1) { return NOT_FOUND; } for (int i = start; i >= 0; i--) { final char high = cs.charAt(i); final char low = cs.charAt(i + 1); if (chars[0] == high && chars[1] == low) { return i; } } } return NOT_FOUND; } static final int TO_STRING_LIMIT = 16; /** * Used by the lastIndexOf(CharSequence methods) as a green implementation of * lastIndexOf * * @param cs the {@code CharSequence} to be processed * @param searchChar the {@code CharSequence} to find * @param start the start index * @return the index where the search sequence was found */ static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { if (searchChar == null || cs == null) { return NOT_FOUND; } if (searchChar instanceof String) { if (cs instanceof String) { return ((String) cs).lastIndexOf((String) searchChar, start); } else if (cs instanceof StringBuilder) { return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); } else if (cs instanceof StringBuffer) { return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); } } final int len1 = cs.length(); final int len2 = searchChar.length(); if (start > len1) { start = len1; } if (start < 0 || len2 < 0 || len2 > len1) { return NOT_FOUND; } if (len2 == 0) { return start; } if (len2 <= TO_STRING_LIMIT) { if (cs instanceof String) { return ((String) cs).lastIndexOf(searchChar.toString(), start); } else if (cs instanceof StringBuilder) { return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); } else if (cs instanceof StringBuffer) { return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); } } if (start + len2 > len1) { start = len1 - len2; } final char char0 = searchChar.charAt(0); int i = start; while (true) { while (cs.charAt(i) != char0) { i--; if (i < 0) { return NOT_FOUND; } } if (checkLaterThan1(cs, searchChar, len2, i)) { return i; } i--; if (i < 0) { return NOT_FOUND; } } } private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { for (int i = 1, j = len2 - 1; i <= j; i++, j--) { if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) { return false; } } return true; } /** * Converts the given CharSequence to a char[]. * * @param source the {@code CharSequence} to be processed. * @return the resulting char array, never null. * @since 3.11 */ public static char[] toCharArray(final CharSequence source) { final int len = StringUtils.length(source); if (len == 0) { return new char[0]; } if (source instanceof String) { return ((String) source).toCharArray(); } final char[] array = new char[len]; for (int i = 0; i < len; i++) { array[i] = source.charAt(i); } return array; } /** * Green implementation of regionMatches. * * @param cs the {@code CharSequence} to be processed * @param ignoreCase whether or not to be case insensitive * @param thisStart the index to start on the {@code cs} CharSequence * @param substring the {@code CharSequence} to be looked for * @param start the index to start on the {@code substring} CharSequence * @param length character length of the region * @return whether the region matched */ static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, final CharSequence substring, final int start, final int length) { if (cs instanceof String && substring instanceof String) { return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); } int index1 = thisStart; int index2 = start; int tmpLen = length; // Extract these first so we detect NPEs the same as the java.lang.String // version final int srcLen = cs.length() - thisStart; final int otherLen = substring.length() - start; // Check for invalid parameters if (thisStart < 0 || start < 0 || length < 0) { return false; } // Check that the regions are long enough if (srcLen < length || otherLen < length) { return false; } while (tmpLen-- > 0) { final char c1 = cs.charAt(index1++); final char c2 = substring.charAt(index2++); if (c1 == c2) { continue; } if (!ignoreCase) { return false; } // The real same check as in String.regionMatches(): final char u1 = Character.toUpperCase(c1); final char u2 = Character.toUpperCase(c2); if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { return false; } } return true; } }