403 lines
13 KiB
Java
403 lines
13 KiB
Java
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
package org.apache.commons.lang3;
|
|
|
|
/**
|
|
* <p>
|
|
* Operations on {@link CharSequence} that are {@code null} safe.
|
|
* </p>
|
|
*
|
|
* @see CharSequence
|
|
* @since 3.0
|
|
*/
|
|
public class CharSequenceUtils {
|
|
|
|
private static final int NOT_FOUND = -1;
|
|
|
|
/**
|
|
* <p>
|
|
* {@code CharSequenceUtils} instances should NOT be constructed in standard
|
|
* programming.
|
|
* </p>
|
|
*
|
|
* <p>
|
|
* This constructor is public to permit tools that require a JavaBean instance
|
|
* to operate.
|
|
* </p>
|
|
*/
|
|
public CharSequenceUtils() {
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
/**
|
|
* <p>
|
|
* Returns a new {@code CharSequence} that is a subsequence of this sequence
|
|
* starting with the {@code char} value at the specified index.
|
|
* </p>
|
|
*
|
|
* <p>
|
|
* This provides the {@code CharSequence} equivalent to
|
|
* {@link String#substring(int)}. The length (in {@code char}) of the returned
|
|
* sequence is {@code length() - start}, so if {@code start == end} then an
|
|
* empty sequence is returned.
|
|
* </p>
|
|
*
|
|
* @param cs the specified subsequence, null returns null
|
|
* @param start the start index, inclusive, valid
|
|
* @return a new subsequence, may be null
|
|
* @throws IndexOutOfBoundsException if {@code start} is negative or if
|
|
* {@code start} is greater than
|
|
* {@code length()}
|
|
*/
|
|
public static CharSequence subSequence(final CharSequence cs, final int start) {
|
|
return cs == null ? null : cs.subSequence(start, cs.length());
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
/**
|
|
* Returns the index within {@code cs} of the first occurrence of the specified
|
|
* character, starting the search at the specified index.
|
|
* <p>
|
|
* If a character with value {@code searchChar} occurs in the character sequence
|
|
* represented by the {@code cs} object at an index no smaller than
|
|
* {@code start}, then the index of the first such occurrence is returned. For
|
|
* values of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), this
|
|
* is the smallest value <i>k</i> such that: <blockquote>
|
|
*
|
|
* <pre>
|
|
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
|
|
* </pre>
|
|
*
|
|
* </blockquote> is true. For other values of {@code searchChar}, it is the
|
|
* smallest value <i>k</i> such that: <blockquote>
|
|
*
|
|
* <pre>
|
|
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start)
|
|
* </pre>
|
|
*
|
|
* </blockquote> is true. In either case, if no such character occurs inm
|
|
* {@code cs} at or after position {@code start}, then {@code -1} is returned.
|
|
*
|
|
* <p>
|
|
* There is no restriction on the value of {@code start}. If it is negative, it
|
|
* has the same effect as if it were zero: the entire {@code CharSequence} may
|
|
* be searched. If it is greater than the length of {@code cs}, it has the same
|
|
* effect as if it were equal to the length of {@code cs}: {@code -1} is
|
|
* returned.
|
|
*
|
|
* <p>
|
|
* All indices are specified in {@code char} values (Unicode code units).
|
|
*
|
|
* @param cs the {@code CharSequence} to be processed, not null
|
|
* @param searchChar the char to be searched for
|
|
* @param start the start index, negative starts at the string start
|
|
* @return the index where the search char was found, -1 if not found
|
|
* @since 3.6 updated to behave more like {@code String}
|
|
*/
|
|
static int indexOf(final CharSequence cs, final int searchChar, int start) {
|
|
if (cs instanceof String) {
|
|
return ((String) cs).indexOf(searchChar, start);
|
|
}
|
|
final int sz = cs.length();
|
|
if (start < 0) {
|
|
start = 0;
|
|
}
|
|
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
|
for (int i = start; i < sz; i++) {
|
|
if (cs.charAt(i) == searchChar) {
|
|
return i;
|
|
}
|
|
}
|
|
return NOT_FOUND;
|
|
}
|
|
// supplementary characters (LANG1300)
|
|
if (searchChar <= Character.MAX_CODE_POINT) {
|
|
final char[] chars = Character.toChars(searchChar);
|
|
for (int i = start; i < sz - 1; i++) {
|
|
final char high = cs.charAt(i);
|
|
final char low = cs.charAt(i + 1);
|
|
if (high == chars[0] && low == chars[1]) {
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
return NOT_FOUND;
|
|
}
|
|
|
|
/**
|
|
* Used by the indexOf(CharSequence methods) as a green implementation of
|
|
* indexOf.
|
|
*
|
|
* @param cs the {@code CharSequence} to be processed
|
|
* @param searchChar the {@code CharSequence} to be searched for
|
|
* @param start the start index
|
|
* @return the index where the search sequence was found
|
|
*/
|
|
static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
|
|
if (cs instanceof String) {
|
|
return ((String) cs).indexOf(searchChar.toString(), start);
|
|
} else if (cs instanceof StringBuilder) {
|
|
return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
|
|
} else if (cs instanceof StringBuffer) {
|
|
return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
|
|
}
|
|
return cs.toString().indexOf(searchChar.toString(), start);
|
|
// if (cs instanceof String && searchChar instanceof String) {
|
|
// // TODO: Do we assume searchChar is usually relatively small;
|
|
// // If so then calling toString() on it is better than reverting to
|
|
// // the green implementation in the else block
|
|
// return ((String) cs).indexOf((String) searchChar, start);
|
|
// } else {
|
|
// // TODO: Implement rather than convert to String
|
|
// return cs.toString().indexOf(searchChar.toString(), start);
|
|
// }
|
|
}
|
|
|
|
/**
|
|
* Returns the index within {@code cs} of the last occurrence of the specified
|
|
* character, searching backward starting at the specified index. For values of
|
|
* {@code searchChar} in the range from 0 to 0xFFFF (inclusive), the index
|
|
* returned is the largest value <i>k</i> such that: <blockquote>
|
|
*
|
|
* <pre>
|
|
* (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
|
|
* </pre>
|
|
*
|
|
* </blockquote> is true. For other values of {@code searchChar}, it is the
|
|
* largest value <i>k</i> such that: <blockquote>
|
|
*
|
|
* <pre>
|
|
* (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start)
|
|
* </pre>
|
|
*
|
|
* </blockquote> is true. In either case, if no such character occurs in
|
|
* {@code cs} at or before position {@code start}, then {@code -1} is returned.
|
|
*
|
|
* <p>
|
|
* All indices are specified in {@code char} values (Unicode code units).
|
|
*
|
|
* @param cs the {@code CharSequence} to be processed
|
|
* @param searchChar the char to be searched for
|
|
* @param start the start index, negative returns -1, beyond length starts
|
|
* at end
|
|
* @return the index where the search char was found, -1 if not found
|
|
* @since 3.6 updated to behave more like {@code String}
|
|
*/
|
|
static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
|
|
if (cs instanceof String) {
|
|
return ((String) cs).lastIndexOf(searchChar, start);
|
|
}
|
|
final int sz = cs.length();
|
|
if (start < 0) {
|
|
return NOT_FOUND;
|
|
}
|
|
if (start >= sz) {
|
|
start = sz - 1;
|
|
}
|
|
if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
|
for (int i = start; i >= 0; --i) {
|
|
if (cs.charAt(i) == searchChar) {
|
|
return i;
|
|
}
|
|
}
|
|
return NOT_FOUND;
|
|
}
|
|
// supplementary characters (LANG1300)
|
|
// NOTE - we must do a forward traversal for this to avoid duplicating code
|
|
// points
|
|
if (searchChar <= Character.MAX_CODE_POINT) {
|
|
final char[] chars = Character.toChars(searchChar);
|
|
// make sure it's not the last index
|
|
if (start == sz - 1) {
|
|
return NOT_FOUND;
|
|
}
|
|
for (int i = start; i >= 0; i--) {
|
|
final char high = cs.charAt(i);
|
|
final char low = cs.charAt(i + 1);
|
|
if (chars[0] == high && chars[1] == low) {
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
return NOT_FOUND;
|
|
}
|
|
|
|
static final int TO_STRING_LIMIT = 16;
|
|
|
|
/**
|
|
* Used by the lastIndexOf(CharSequence methods) as a green implementation of
|
|
* lastIndexOf
|
|
*
|
|
* @param cs the {@code CharSequence} to be processed
|
|
* @param searchChar the {@code CharSequence} to find
|
|
* @param start the start index
|
|
* @return the index where the search sequence was found
|
|
*/
|
|
static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
|
|
if (searchChar == null || cs == null) {
|
|
return NOT_FOUND;
|
|
}
|
|
if (searchChar instanceof String) {
|
|
if (cs instanceof String) {
|
|
return ((String) cs).lastIndexOf((String) searchChar, start);
|
|
} else if (cs instanceof StringBuilder) {
|
|
return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
|
|
} else if (cs instanceof StringBuffer) {
|
|
return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
|
|
}
|
|
}
|
|
|
|
final int len1 = cs.length();
|
|
final int len2 = searchChar.length();
|
|
|
|
if (start > len1) {
|
|
start = len1;
|
|
}
|
|
|
|
if (start < 0 || len2 < 0 || len2 > len1) {
|
|
return NOT_FOUND;
|
|
}
|
|
|
|
if (len2 == 0) {
|
|
return start;
|
|
}
|
|
|
|
if (len2 <= TO_STRING_LIMIT) {
|
|
if (cs instanceof String) {
|
|
return ((String) cs).lastIndexOf(searchChar.toString(), start);
|
|
} else if (cs instanceof StringBuilder) {
|
|
return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
|
|
} else if (cs instanceof StringBuffer) {
|
|
return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
|
|
}
|
|
}
|
|
|
|
if (start + len2 > len1) {
|
|
start = len1 - len2;
|
|
}
|
|
|
|
final char char0 = searchChar.charAt(0);
|
|
|
|
int i = start;
|
|
while (true) {
|
|
while (cs.charAt(i) != char0) {
|
|
i--;
|
|
if (i < 0) {
|
|
return NOT_FOUND;
|
|
}
|
|
}
|
|
if (checkLaterThan1(cs, searchChar, len2, i)) {
|
|
return i;
|
|
}
|
|
i--;
|
|
if (i < 0) {
|
|
return NOT_FOUND;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2,
|
|
final int start1) {
|
|
for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
|
|
if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Converts the given CharSequence to a char[].
|
|
*
|
|
* @param source the {@code CharSequence} to be processed.
|
|
* @return the resulting char array, never null.
|
|
* @since 3.11
|
|
*/
|
|
public static char[] toCharArray(final CharSequence source) {
|
|
final int len = StringUtils.length(source);
|
|
if (len == 0) {
|
|
return new char[0];
|
|
}
|
|
if (source instanceof String) {
|
|
return ((String) source).toCharArray();
|
|
}
|
|
final char[] array = new char[len];
|
|
for (int i = 0; i < len; i++) {
|
|
array[i] = source.charAt(i);
|
|
}
|
|
return array;
|
|
}
|
|
|
|
/**
|
|
* Green implementation of regionMatches.
|
|
*
|
|
* @param cs the {@code CharSequence} to be processed
|
|
* @param ignoreCase whether or not to be case insensitive
|
|
* @param thisStart the index to start on the {@code cs} CharSequence
|
|
* @param substring the {@code CharSequence} to be looked for
|
|
* @param start the index to start on the {@code substring} CharSequence
|
|
* @param length character length of the region
|
|
* @return whether the region matched
|
|
*/
|
|
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
|
|
final CharSequence substring, final int start, final int length) {
|
|
if (cs instanceof String && substring instanceof String) {
|
|
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
|
|
}
|
|
int index1 = thisStart;
|
|
int index2 = start;
|
|
int tmpLen = length;
|
|
|
|
// Extract these first so we detect NPEs the same as the java.lang.String
|
|
// version
|
|
final int srcLen = cs.length() - thisStart;
|
|
final int otherLen = substring.length() - start;
|
|
|
|
// Check for invalid parameters
|
|
if (thisStart < 0 || start < 0 || length < 0) {
|
|
return false;
|
|
}
|
|
|
|
// Check that the regions are long enough
|
|
if (srcLen < length || otherLen < length) {
|
|
return false;
|
|
}
|
|
|
|
while (tmpLen-- > 0) {
|
|
final char c1 = cs.charAt(index1++);
|
|
final char c2 = substring.charAt(index2++);
|
|
|
|
if (c1 == c2) {
|
|
continue;
|
|
}
|
|
|
|
if (!ignoreCase) {
|
|
return false;
|
|
}
|
|
|
|
// The real same check as in String.regionMatches():
|
|
final char u1 = Character.toUpperCase(c1);
|
|
final char u2 = Character.toUpperCase(c2);
|
|
if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|