/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang3; import java.io.Serializable; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** *

* A set of characters. *

* *

* Instances are immutable, but instances of subclasses may not be. *

* *

* #ThreadSafe# *

* * @since 1.0 */ public class CharSet implements Serializable { /** * Required for serialization support. Lang version 2.0. * * @see java.io.Serializable */ private static final long serialVersionUID = 5947847346149275958L; /** * A CharSet defining no characters. * * @since 2.0 */ public static final CharSet EMPTY = new CharSet((String) null); /** * A CharSet defining ASCII alphabetic characters "a-zA-Z". * * @since 2.0 */ public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z"); /** * A CharSet defining ASCII alphabetic characters "a-z". * * @since 2.0 */ public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z"); /** * A CharSet defining ASCII alphabetic characters "A-Z". * * @since 2.0 */ public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z"); /** * A CharSet defining ASCII alphabetic characters "0-9". * * @since 2.0 */ public static final CharSet ASCII_NUMERIC = new CharSet("0-9"); /** * A Map of the common cases used in the factory. Subclasses can add more common * patterns if desired * * @since 2.0 */ protected static final Map COMMON = Collections.synchronizedMap(new HashMap<>()); static { COMMON.put(null, EMPTY); COMMON.put(StringUtils.EMPTY, EMPTY); COMMON.put("a-zA-Z", ASCII_ALPHA); COMMON.put("A-Za-z", ASCII_ALPHA); COMMON.put("a-z", ASCII_ALPHA_LOWER); COMMON.put("A-Z", ASCII_ALPHA_UPPER); COMMON.put("0-9", ASCII_NUMERIC); } /** The set of CharRange objects. */ private final Set set = Collections.synchronizedSet(new HashSet<>()); // ----------------------------------------------------------------------- /** *

* Factory method to create a new CharSet using a special syntax. *

* * * *

* The matching order is: *

*
    *
  1. Negated multi character range, such as "^a-e" *
  2. Ordinary multi character range, such as "a-e" *
  3. Negated single character, such as "^a" *
  4. Ordinary single character, such as "a" *
* *

* Matching works left to right. Once a match is found the search starts again * from the next character. *

* *

* If the same range is defined twice using the same syntax, only one range will * be kept. Thus, "a-ca-c" creates only one range of "a-c". *

* *

* If the start and end of a range are in the wrong order, they are reversed. * Thus "a-e" is the same as "e-a". As a result, "a-ee-a" would create only one * range, as the "a-e" and "e-a" are the same. *

* *

* The set of characters represented is the union of the specified ranges. *

* *

* There are two ways to add a literal negation character ({@code ^}): *

* * *

* Examples using the negation character: *

* *
	 *     CharSet.getInstance("^a-c").contains('a') = false
	 *     CharSet.getInstance("^a-c").contains('d') = true
	 *     CharSet.getInstance("^^a-c").contains('a') = true // (only '^' is negated)
	 *     CharSet.getInstance("^^a-c").contains('^') = false
	 *     CharSet.getInstance("^a-cd-f").contains('d') = true
	 *     CharSet.getInstance("a-c^").contains('^') = true
	 *     CharSet.getInstance("^", "a-c").contains('^') = true
	 * 
* *

* All CharSet objects returned by this method will be immutable. *

* * @param setStrs Strings to merge into the set, may be null * @return a CharSet instance * @since 2.4 */ public static CharSet getInstance(final String... setStrs) { if (setStrs == null) { return null; } if (setStrs.length == 1) { final CharSet common = COMMON.get(setStrs[0]); if (common != null) { return common; } } return new CharSet(setStrs); } // ----------------------------------------------------------------------- /** *

* Constructs a new CharSet using the set syntax. Each string is merged in with * the set. *

* * @param set Strings to merge into the initial set * @throws NullPointerException if set is {@code null} */ protected CharSet(final String... set) { for (final String s : set) { add(s); } } // ----------------------------------------------------------------------- /** *

* Add a set definition string to the {@code CharSet}. *

* * @param str set definition string */ protected void add(final String str) { if (str == null) { return; } final int len = str.length(); int pos = 0; while (pos < len) { final int remainder = len - pos; if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') { // negated range set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3))); pos += 4; } else if (remainder >= 3 && str.charAt(pos + 1) == '-') { // range set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2))); pos += 3; } else if (remainder >= 2 && str.charAt(pos) == '^') { // negated char set.add(CharRange.isNot(str.charAt(pos + 1))); pos += 2; } else { // char set.add(CharRange.is(str.charAt(pos))); pos += 1; } } } // ----------------------------------------------------------------------- /** *

* Gets the internal set as an array of CharRange objects. *

* * @return an array of immutable CharRange objects * @since 2.0 */ // NOTE: This is no longer public as CharRange is no longer a public class. // It may be replaced when CharSet moves to Range. /* public */ CharRange[] getCharRanges() { return set.toArray(CharRange.EMPTY_ARRAY); } // ----------------------------------------------------------------------- /** *

* Does the {@code CharSet} contain the specified character {@code ch}. *

* * @param ch the character to check for * @return {@code true} if the set contains the characters */ public boolean contains(final char ch) { synchronized (set) { for (final CharRange range : set) { if (range.contains(ch)) { return true; } } } return false; } // Basics // ----------------------------------------------------------------------- /** *

* Compares two {@code CharSet} objects, returning true if they represent * exactly the same set of characters defined in the same way. *

* *

* The two sets {@code abc} and {@code a-c} are not equal according to * this method. *

* * @param obj the object to compare to * @return true if equal * @since 2.0 */ @Override public boolean equals(final Object obj) { if (obj == this) { return true; } if (!(obj instanceof CharSet)) { return false; } final CharSet other = (CharSet) obj; return set.equals(other.set); } /** *

* Gets a hash code compatible with the equals method. *

* * @return a suitable hash code * @since 2.0 */ @Override public int hashCode() { return 89 + set.hashCode(); } /** *

* Gets a string representation of the set. *

* * @return string representation of the set */ @Override public String toString() { return set.toString(); } }