From 4e4f2b332affec7edf2ff227644ccb825799824a Mon Sep 17 00:00:00 2001 From: konsoletyper Date: Thu, 8 May 2014 16:57:14 +0400 Subject: [PATCH 1/4] Adds initial classes for java.util.regex from Apache Harmony --- .../java/util/TMissingResourceException.java | 43 + .../java/util/regex/AbstractCharClass.java | 855 +++++++ .../util/regex/AbstractLineTerminator.java | 82 + .../classlib/java/util/regex/AbstractSet.java | 273 +++ .../classlib/java/util/regex/AheadFSet.java | 57 + .../util/regex/AltGroupQuantifierSet.java | 69 + .../java/util/regex/AltQuantifierSet.java | 64 + .../classlib/java/util/regex/AtomicFSet.java | 70 + .../java/util/regex/AtomicJointSet.java | 87 + .../java/util/regex/BackReferenceSet.java | 123 + .../util/regex/BackReferencedSingleSet.java | 123 + .../classlib/java/util/regex/BehindFSet.java | 62 + .../java/util/regex/CIBackReferenceSet.java | 106 + .../classlib/java/util/regex/CICharSet.java | 67 + .../java/util/regex/CIDecomposedCharSet.java | 50 + .../java/util/regex/CISequenceSet.java | 71 + .../classlib/java/util/regex/CanClasses.java | 439 ++++ .../classlib/java/util/regex/CharClass.java | 587 +++++ .../classlib/java/util/regex/CharSet.java | 126 + .../regex/CompositeGroupQuantifierSet.java | 103 + .../util/regex/CompositeQuantifierSet.java | 104 + .../java/util/regex/CompositeRangeSet.java | 181 ++ .../java/util/regex/DecomposedCharSet.java | 259 +++ .../java/util/regex/DotAllQuantifierSet.java | 59 + .../classlib/java/util/regex/DotAllSet.java | 72 + .../java/util/regex/DotQuantifierSet.java | 122 + .../classlib/java/util/regex/DotSet.java | 80 + .../classlib/java/util/regex/EOISet.java | 53 + .../classlib/java/util/regex/EOLSet.java | 74 + .../classlib/java/util/regex/EmptySet.java | 117 + .../teavm/classlib/java/util/regex/FSet.java | 85 + .../classlib/java/util/regex/FinalSet.java | 48 + .../java/util/regex/GroupQuantifierSet.java | 55 + .../util/regex/HangulDecomposedCharSet.java | 203 ++ .../java/util/regex/HashDecompositions.java | 2072 +++++++++++++++++ .../java/util/regex/HighSurrogateCharSet.java | 251 ++ .../teavm/classlib/java/util/regex/I18n.java | 46 + .../classlib/java/util/regex/IntArrHash.java | 82 + .../classlib/java/util/regex/IntHash.java | 84 + .../classlib/java/util/regex/JointSet.java | 154 ++ .../java/util/regex/LeafQuantifierSet.java | 73 + .../classlib/java/util/regex/LeafSet.java | 78 + .../teavm/classlib/java/util/regex/Lexer.java | 1273 ++++++++++ .../util/regex/LowHighSurrogateRangeSet.java | 188 ++ .../java/util/regex/LowSurrogateCharSet.java | 250 ++ .../java/util/regex/MatchResultImpl.java | 262 +++ .../java/util/regex/MultiLineEOLSet.java | 87 + .../java/util/regex/MultiLineSOLSet.java | 56 + .../java/util/regex/NegativeLookAhead.java | 59 + .../java/util/regex/NegativeLookBehind.java | 70 + .../classlib/java/util/regex/NonCapFSet.java | 50 + .../java/util/regex/NonCapJointSet.java | 66 + .../util/regex/PosAltGroupQuantifierSet.java | 50 + .../regex/PosCompositeGroupQuantifierSet.java | 55 + .../util/regex/PosPlusGroupQuantifierSet.java | 53 + .../java/util/regex/PositiveLookAhead.java | 63 + .../java/util/regex/PositiveLookBehind.java | 73 + .../regex/PossessiveAltQuantifierSet.java | 46 + .../PossessiveCompositeQuantifierSet.java | 63 + .../regex/PossessiveGroupQuantifierSet.java | 48 + .../util/regex/PossessiveQuantifierSet.java | 44 + .../java/util/regex/PreviousMatch.java | 49 + .../classlib/java/util/regex/Quantifier.java | 82 + .../java/util/regex/QuantifierSet.java | 132 ++ .../classlib/java/util/regex/RangeSet.java | 73 + .../util/regex/RelAltGroupQuantifierSet.java | 48 + .../regex/RelCompositeGroupQuantifierSet.java | 68 + .../util/regex/ReluctantAltQuantifierSet.java | 45 + .../ReluctantCompositeQuantifierSet.java | 71 + .../regex/ReluctantGroupQuantifierSet.java | 48 + .../util/regex/ReluctantQuantifierSet.java | 53 + .../classlib/java/util/regex/SOLSet.java | 47 + .../classlib/java/util/regex/SequenceSet.java | 223 ++ .../java/util/regex/SingleDecompositions.java | 971 ++++++++ .../classlib/java/util/regex/SingleSet.java | 126 + .../java/util/regex/SpecialToken.java | 43 + .../java/util/regex/SupplCharSet.java | 198 ++ .../java/util/regex/SupplRangeSet.java | 174 ++ .../java/util/regex/TMatchResult.java | 102 + .../classlib/java/util/regex/TMatcher.java | 698 ++++++ .../classlib/java/util/regex/TPattern.java | 1429 ++++++++++++ .../util/regex/TPatternSyntaxException.java | 125 + .../java/util/regex/UCIBackReferenceSet.java | 59 + .../classlib/java/util/regex/UCICharSet.java | 45 + .../java/util/regex/UCIDecomposedCharSet.java | 35 + .../classlib/java/util/regex/UCIRangeSet.java | 55 + .../java/util/regex/UCISequenceSet.java | 57 + .../java/util/regex/UCISupplCharSet.java | 110 + .../java/util/regex/UCISupplRangeSet.java | 108 + .../classlib/java/util/regex/UEOLSet.java | 65 + .../java/util/regex/UMultiLineEOLSet.java | 61 + .../java/util/regex/UnicodeCategory.java | 39 + .../java/util/regex/UnicodeCategoryScope.java | 36 + .../java/util/regex/UnifiedQuantifierSet.java | 64 + .../java/util/regex/WordBoundary.java | 83 + .../java/util/regex/Matcher2Test.java | 234 ++ .../classlib/java/util/regex/MatcherTest.java | 772 ++++++ .../classlib/java/util/regex/ModeTest.java | 111 + .../java/util/regex/Pattern2Test.java | 1412 +++++++++++ .../java/util/regex/PatternErrorTest.java | 67 + .../regex/PatternSyntaxExceptionTest.java | 61 + .../classlib/java/util/regex/PatternTest.java | 1635 +++++++++++++ .../classlib/java/util/regex/ReplaceTest.java | 90 + .../classlib/java/util/regex/SplitTest.java | 172 ++ 104 files changed, 20841 insertions(+) create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/TMissingResourceException.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIDecomposedCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/I18n.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntArrHash.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntHash.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SpecialToken.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatchResult.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIDecomposedCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java create mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Matcher2Test.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/MatcherTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ModeTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Pattern2Test.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternErrorTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternSyntaxExceptionTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java create mode 100644 teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/TMissingResourceException.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/TMissingResourceException.java new file mode 100644 index 000000000..6f8a8d828 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/TMissingResourceException.java @@ -0,0 +1,43 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.teavm.classlib.java.util; + +import org.teavm.classlib.java.lang.TRuntimeException; +import org.teavm.classlib.java.lang.TString; + +/** + * + * @author Alexey Andreev + */ +public class TMissingResourceException extends TRuntimeException { + private static final long serialVersionUID = 6730397307327337970L; + private String className; + private String key; + + public TMissingResourceException(String s, String className, String key) { + super(TString.wrap(s)); + this.className = className; + this.key = key; + } + + public String getClassName() { + return className; + } + + public String getKey() { + return key; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java new file mode 100644 index 000000000..14da9ff70 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java @@ -0,0 +1,855 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.BitSet; +import java.util.ListResourceBundle; + +/** + * This class represents character classes, i.e. + * sets of character either predefined or user defined. + * + * Note, this class represent token, not node, so being + * constructed by lexer. + * + * @author Nikolay A. Kuznetsov + */ +abstract class AbstractCharClass extends SpecialToken { + protected boolean alt; + + protected boolean altSurrogates; + + //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1 + static int SURROGATE_CARDINALITY = 2048; + + BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY); + + AbstractCharClass charClassWithoutSurrogates = null; + + AbstractCharClass charClassWithSurrogates = null; + + static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses(); + + /* + * Indicates if this class may contain supplementary Unicode codepoints. + * If this flag is specified it doesn't mean that this class contains + * supplementary characters but may contain. + */ + protected boolean mayContainSupplCodepoints = false; + + /** + * Returns true if this char class contains character specified; + * + * @param ch + * character to check; + */ + abstract public boolean contains(int ch); + + /** + * Returns BitSet representing this character class or null + * if this character class does not have character representation; + * + * @return bitset + */ + protected BitSet getBits() { + return null; + } + + protected BitSet getLowHighSurrogates() { + return lowHighSurrogates; + } + + public boolean hasLowHighSurrogates() { + return altSurrogates + ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY + : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY; + } + + public boolean mayContainSupplCodepoints() { + return mayContainSupplCodepoints; + } + + public int getType() { + return SpecialToken.TOK_CHARCLASS; + } + + public AbstractCharClass getInstance() { + return this; + } + + public AbstractCharClass getSurrogates() { + + if (charClassWithSurrogates == null) { + final BitSet lHS = getLowHighSurrogates(); + + charClassWithSurrogates = new AbstractCharClass() { + public boolean contains(int ch) { + int index = ch - Character.MIN_SURROGATE; + + return ((index >= 0) + && (index < AbstractCharClass.SURROGATE_CARDINALITY)) + ? this.altSurrogates ^ lHS.get(index) + : false; + } + }; + charClassWithSurrogates.setNegative(this.altSurrogates); + } + + return charClassWithSurrogates; + } + + public AbstractCharClass getWithoutSurrogates() { + if (charClassWithoutSurrogates == null) { + final BitSet lHS = getLowHighSurrogates(); + final AbstractCharClass thisClass = this; + + charClassWithoutSurrogates = new AbstractCharClass() { + public boolean contains(int ch) { + int index = ch - Character.MIN_SURROGATE; + + boolean containslHS = ((index >= 0) + && (index < AbstractCharClass.SURROGATE_CARDINALITY)) + ? this.altSurrogates ^ lHS.get(index) + : false; + + + return thisClass.contains(ch) + && !containslHS; + } + }; + charClassWithoutSurrogates.setNegative(isNegative()); + charClassWithoutSurrogates.mayContainSupplCodepoints + = mayContainSupplCodepoints; + } + + return charClassWithoutSurrogates; + } + + public boolean hasUCI() { + return false; + } + + /** + * Sets this CharClass to negative form, i.e. if they will add some + * characters and after that set this class to negative it will accept all + * the characters except previously set ones. + * + * Although this method will not alternate all the already set characters, + * just overall meaning of the class. + * + * @see #contains(int) + * @see #intersect(CharClass) + * @see #union(CharClass) + */ + public AbstractCharClass setNegative(boolean value) { + if (alt ^ value) { + alt = !alt; + altSurrogates = !altSurrogates; + } + if (!mayContainSupplCodepoints) { + mayContainSupplCodepoints = true; + } + return this; + } + + public boolean isNegative() { + return alt; + } + + // ----------------------------------------------------------------- + // Static methods and predefined classes + // ----------------------------------------------------------------- + + public static boolean intersects(int ch1, int ch2) { + return ch1 == ch2; + } + + public static boolean intersects(AbstractCharClass cc, int ch) { + return cc.contains(ch); + } + + public static boolean intersects(AbstractCharClass cc1, + AbstractCharClass cc2) { + if (cc1.getBits() == null || cc2.getBits() == null) + return true; + return cc1.getBits().intersects(cc2.getBits()); + } + + public static AbstractCharClass getPredefinedClass(String name, + boolean negative) { + return ((LazyCharClass) charClasses.getObject(name)).getValue(negative); + } + + abstract static class LazyCharClass { + AbstractCharClass posValue = null; + + AbstractCharClass negValue = null; + + public AbstractCharClass getValue(boolean negative) { + if (!negative && posValue == null) { + posValue = computeValue(); + } else if (negative && negValue == null) { + negValue = computeValue().setNegative(true); + } + if (!negative) + return posValue; + return negValue; + } + + protected abstract AbstractCharClass computeValue(); + } + + static class LazyDigit extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('0', '9'); + } + } + + static class LazyNonDigit extends LazyDigit { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = super.computeValue().setNegative(true); + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazySpace extends LazyCharClass { + protected AbstractCharClass computeValue() { + /* 9-13 - \t\n\x0B\f\r; 32 - ' ' */ + return new CharClass().add(9, 13).add(32); + } + } + + static class LazyNonSpace extends LazySpace { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = super.computeValue().setNegative(true); + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyWord extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('a', 'z').add('A', 'Z').add('0', '9') + .add('_'); + } + } + + static class LazyNonWord extends LazyWord { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = super.computeValue().setNegative(true); + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyLower extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('a', 'z'); + } + } + + static class LazyUpper extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('A', 'Z'); + } + } + + static class LazyASCII extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add(0x00, 0x7F); + } + } + + static class LazyAlpha extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('a', 'z').add('A', 'Z'); + } + } + + static class LazyAlnum extends LazyAlpha { + protected AbstractCharClass computeValue() { + return ((CharClass) super.computeValue()).add('0', '9'); + } + } + + static class LazyPunct extends LazyCharClass { + protected AbstractCharClass computeValue() { + /* Punctuation !"#$%&'()*+,-./:;<=>?@ [\]^_` {|}~ */ + return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, + 0x7E); + } + } + + static class LazyGraph extends LazyAlnum { + protected AbstractCharClass computeValue() { + /* plus punctuation */ + return ((CharClass) super.computeValue()).add(0x21, 0x40).add(0x5B, + 0x60).add(0x7B, 0x7E); + } + } + + static class LazyPrint extends LazyGraph { + protected AbstractCharClass computeValue() { + return ((CharClass) super.computeValue()).add(0x20); + } + } + + static class LazyBlank extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add(' ').add('\t'); + } + } + + static class LazyCntrl extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add(0x00, 0x1F).add(0x7F); + } + } + + static class LazyXDigit extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new CharClass().add('0', '9').add('a', 'f').add('A', 'F'); + } + } + + static class LazyRange extends LazyCharClass { + int start, end; + + public LazyRange(int start, int end) { + this.start = start; + this.end = end; + } + + public AbstractCharClass computeValue() { + AbstractCharClass chCl = new CharClass().add(start, end); + return chCl; + } + } + + static class LazySpecialsBlock extends LazyCharClass { + public AbstractCharClass computeValue() { + return new CharClass().add(0xFEFF, 0xFEFF).add(0xFFF0, 0xFFFD); + } + } + + static class LazyCategoryScope extends LazyCharClass { + int category; + + boolean mayContainSupplCodepoints; + + boolean containsAllSurrogates; + + public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) { + this.mayContainSupplCodepoints = mayContainSupplCodepoints; + this.category = cat; + } + + public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints, + boolean containsAllSurrogates) { + this.containsAllSurrogates = containsAllSurrogates; + this.mayContainSupplCodepoints = mayContainSupplCodepoints; + this.category = cat; + } + + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new UnicodeCategoryScope(category); + if (containsAllSurrogates) { + chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY); + } + + chCl.mayContainSupplCodepoints = mayContainSupplCodepoints; + return chCl; + } + } + + static class LazyCategory extends LazyCharClass { + int category; + + boolean mayContainSupplCodepoints; + + boolean containsAllSurrogates; + + public LazyCategory(int cat, boolean mayContainSupplCodepoints) { + this.mayContainSupplCodepoints = mayContainSupplCodepoints; + this.category = cat; + } + public LazyCategory(int cat, boolean mayContainSupplCodepoints, + boolean containsAllSurrogates) { + this.containsAllSurrogates = containsAllSurrogates; + this.mayContainSupplCodepoints = mayContainSupplCodepoints; + this.category = cat; + } + + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new UnicodeCategory(category); + if (containsAllSurrogates) { + chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY); + } + chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;; + return chCl; + } + } + + static class LazyJavaLowerCase extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isLowerCase(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaUpperCase extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isUpperCase(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaWhitespace extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isWhitespace(ch); + } + }; + } + } + + static class LazyJavaMirrored extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new AbstractCharClass() { + public boolean contains(int ch) { + // TODO implement this method and uncomment + //return Character.isMirrored(ch); + return false; + } + }; + } + } + + static class LazyJavaDefined extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isDefined(ch); + } + }; + chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY); + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaDigit extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isDigit(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaIdentifierIgnorable extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isIdentifierIgnorable(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaISOControl extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isISOControl(ch); + } + }; + } + } + + static class LazyJavaJavaIdentifierPart extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isJavaIdentifierPart(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaJavaIdentifierStart extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isJavaIdentifierStart(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaLetter extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isLetter(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaLetterOrDigit extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isLetterOrDigit(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaSpaceChar extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isSpaceChar(ch); + } + }; + } + } + + static class LazyJavaTitleCase extends LazyCharClass { + protected AbstractCharClass computeValue() { + return new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isTitleCase(ch); + } + }; + } + } + + static class LazyJavaUnicodeIdentifierPart extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isUnicodeIdentifierPart(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + static class LazyJavaUnicodeIdentifierStart extends LazyCharClass { + protected AbstractCharClass computeValue() { + AbstractCharClass chCl = new AbstractCharClass() { + public boolean contains(int ch) { + return Character.isUnicodeIdentifierStart(ch); + } + }; + + chCl.mayContainSupplCodepoints = true; + return chCl; + } + } + + /** + * character classes generated from + * http://www.unicode.org/reports/tr18/ + * http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt + */ + static final class PredefinedCharacterClasses { + static LazyCharClass space = new LazySpace(); + + static LazyCharClass digit = new LazyDigit(); + + static final Object[][] contents = { + { "Lower", new LazyLower() }, //$NON-NLS-1$ + { "Upper", new LazyUpper() }, //$NON-NLS-1$ + { "ASCII", new LazyASCII() }, //$NON-NLS-1$ + { "Alpha", new LazyAlpha() }, //$NON-NLS-1$ + { "Digit", digit }, //$NON-NLS-1$ + { "Alnum", new LazyAlnum() }, //$NON-NLS-1$ + { "Punct", new LazyPunct() }, //$NON-NLS-1$ + { "Graph", new LazyGraph() }, //$NON-NLS-1$ + { "Print", new LazyPrint() }, //$NON-NLS-1$ + { "Blank", new LazyBlank() }, //$NON-NLS-1$ + { "Cntrl", new LazyCntrl() }, //$NON-NLS-1$ + { "XDigit", new LazyXDigit() }, //$NON-NLS-1$ + { "javaLowerCase", new LazyJavaLowerCase() }, //$NON-NLS-1$ + { "javaUpperCase", new LazyJavaUpperCase() }, //$NON-NLS-1$ + { "javaWhitespace", new LazyJavaWhitespace() }, //$NON-NLS-1$ + { "javaMirrored", new LazyJavaMirrored() }, //$NON-NLS-1$ + { "javaDefined", new LazyJavaDefined() }, //$NON-NLS-1$ + { "javaDigit", new LazyJavaDigit() }, //$NON-NLS-1$ + { "javaIdentifierIgnorable", new LazyJavaIdentifierIgnorable() }, //$NON-NLS-1$ + { "javaISOControl", new LazyJavaISOControl() }, //$NON-NLS-1$ + { "javaJavaIdentifierPart", new LazyJavaJavaIdentifierPart() }, //$NON-NLS-1$ + { "javaJavaIdentifierStart", new LazyJavaJavaIdentifierStart() }, //$NON-NLS-1$ + { "javaLetter", new LazyJavaLetter() }, //$NON-NLS-1$ + { "javaLetterOrDigit", new LazyJavaLetterOrDigit() }, //$NON-NLS-1$ + { "javaSpaceChar", new LazyJavaSpaceChar() }, //$NON-NLS-1$ + { "javaTitleCase", new LazyJavaTitleCase() }, //$NON-NLS-1$ + { "javaUnicodeIdentifierPart", new LazyJavaUnicodeIdentifierPart() }, //$NON-NLS-1$ + { "javaUnicodeIdentifierStart", new LazyJavaUnicodeIdentifierStart() }, //$NON-NLS-1$ + { "Space", space }, //$NON-NLS-1$ + { "w", new LazyWord() }, //$NON-NLS-1$ + { "W", new LazyNonWord() }, //$NON-NLS-1$ + { "s", space }, //$NON-NLS-1$ + { "S", new LazyNonSpace() }, //$NON-NLS-1$ + { "d", digit }, //$NON-NLS-1$ + { "D", new LazyNonDigit() }, //$NON-NLS-1$ + { "BasicLatin", new LazyRange(0x0000, 0x007F) }, //$NON-NLS-1$ + { "Latin-1Supplement", new LazyRange(0x0080, 0x00FF) }, //$NON-NLS-1$ + { "LatinExtended-A", new LazyRange(0x0100, 0x017F) }, //$NON-NLS-1$ + { "LatinExtended-B", new LazyRange(0x0180, 0x024F) }, //$NON-NLS-1$ + { "IPAExtensions", new LazyRange(0x0250, 0x02AF) }, //$NON-NLS-1$ + { "SpacingModifierLetters", new LazyRange(0x02B0, 0x02FF) }, //$NON-NLS-1$ + { "CombiningDiacriticalMarks", new LazyRange(0x0300, 0x036F) }, //$NON-NLS-1$ + { "Greek", new LazyRange(0x0370, 0x03FF) }, //$NON-NLS-1$ + { "Cyrillic", new LazyRange(0x0400, 0x04FF) }, //$NON-NLS-1$ + { "CyrillicSupplement", new LazyRange(0x0500, 0x052F) }, //$NON-NLS-1$ + { "Armenian", new LazyRange(0x0530, 0x058F) }, //$NON-NLS-1$ + { "Hebrew", new LazyRange(0x0590, 0x05FF) }, //$NON-NLS-1$ + { "Arabic", new LazyRange(0x0600, 0x06FF) }, //$NON-NLS-1$ + { "Syriac", new LazyRange(0x0700, 0x074F) }, //$NON-NLS-1$ + { "ArabicSupplement", new LazyRange(0x0750, 0x077F) }, //$NON-NLS-1$ + { "Thaana", new LazyRange(0x0780, 0x07BF) }, //$NON-NLS-1$ + { "Devanagari", new LazyRange(0x0900, 0x097F) }, //$NON-NLS-1$ + { "Bengali", new LazyRange(0x0980, 0x09FF) }, //$NON-NLS-1$ + { "Gurmukhi", new LazyRange(0x0A00, 0x0A7F) }, //$NON-NLS-1$ + { "Gujarati", new LazyRange(0x0A80, 0x0AFF) }, //$NON-NLS-1$ + { "Oriya", new LazyRange(0x0B00, 0x0B7F) }, //$NON-NLS-1$ + { "Tamil", new LazyRange(0x0B80, 0x0BFF) }, //$NON-NLS-1$ + { "Telugu", new LazyRange(0x0C00, 0x0C7F) }, //$NON-NLS-1$ + { "Kannada", new LazyRange(0x0C80, 0x0CFF) }, //$NON-NLS-1$ + { "Malayalam", new LazyRange(0x0D00, 0x0D7F) }, //$NON-NLS-1$ + { "Sinhala", new LazyRange(0x0D80, 0x0DFF) }, //$NON-NLS-1$ + { "Thai", new LazyRange(0x0E00, 0x0E7F) }, //$NON-NLS-1$ + { "Lao", new LazyRange(0x0E80, 0x0EFF) }, //$NON-NLS-1$ + { "Tibetan", new LazyRange(0x0F00, 0x0FFF) }, //$NON-NLS-1$ + { "Myanmar", new LazyRange(0x1000, 0x109F) }, //$NON-NLS-1$ + { "Georgian", new LazyRange(0x10A0, 0x10FF) }, //$NON-NLS-1$ + { "HangulJamo", new LazyRange(0x1100, 0x11FF) }, //$NON-NLS-1$ + { "Ethiopic", new LazyRange(0x1200, 0x137F) }, //$NON-NLS-1$ + { "EthiopicSupplement", new LazyRange(0x1380, 0x139F) }, //$NON-NLS-1$ + { "Cherokee", new LazyRange(0x13A0, 0x13FF) }, //$NON-NLS-1$ + { "UnifiedCanadianAboriginalSyllabics", //$NON-NLS-1$ + new LazyRange(0x1400, 0x167F) }, + { "Ogham", new LazyRange(0x1680, 0x169F) }, //$NON-NLS-1$ + { "Runic", new LazyRange(0x16A0, 0x16FF) }, //$NON-NLS-1$ + { "Tagalog", new LazyRange(0x1700, 0x171F) }, //$NON-NLS-1$ + { "Hanunoo", new LazyRange(0x1720, 0x173F) }, //$NON-NLS-1$ + { "Buhid", new LazyRange(0x1740, 0x175F) }, //$NON-NLS-1$ + { "Tagbanwa", new LazyRange(0x1760, 0x177F) }, //$NON-NLS-1$ + { "Khmer", new LazyRange(0x1780, 0x17FF) }, //$NON-NLS-1$ + { "Mongolian", new LazyRange(0x1800, 0x18AF) }, //$NON-NLS-1$ + { "Limbu", new LazyRange(0x1900, 0x194F) }, //$NON-NLS-1$ + { "TaiLe", new LazyRange(0x1950, 0x197F) }, //$NON-NLS-1$ + { "NewTaiLue", new LazyRange(0x1980, 0x19DF) }, //$NON-NLS-1$ + { "KhmerSymbols", new LazyRange(0x19E0, 0x19FF) }, //$NON-NLS-1$ + { "Buginese", new LazyRange(0x1A00, 0x1A1F) }, //$NON-NLS-1$ + { "PhoneticExtensions", new LazyRange(0x1D00, 0x1D7F) }, //$NON-NLS-1$ + { "PhoneticExtensionsSupplement", new LazyRange(0x1D80, 0x1DBF) }, //$NON-NLS-1$ + { "CombiningDiacriticalMarksSupplement", //$NON-NLS-1$ + new LazyRange(0x1DC0, 0x1DFF) }, + { "LatinExtendedAdditional", new LazyRange(0x1E00, 0x1EFF) }, //$NON-NLS-1$ + { "GreekExtended", new LazyRange(0x1F00, 0x1FFF) }, //$NON-NLS-1$ + { "GeneralPunctuation", new LazyRange(0x2000, 0x206F) }, //$NON-NLS-1$ + { "SuperscriptsandSubscripts", new LazyRange(0x2070, 0x209F) }, //$NON-NLS-1$ + { "CurrencySymbols", new LazyRange(0x20A0, 0x20CF) }, //$NON-NLS-1$ + { "CombiningMarksforSymbols", new LazyRange(0x20D0, 0x20FF) }, //$NON-NLS-1$ + { "LetterlikeSymbols", new LazyRange(0x2100, 0x214F) }, //$NON-NLS-1$ + { "NumberForms", new LazyRange(0x2150, 0x218F) }, //$NON-NLS-1$ + { "Arrows", new LazyRange(0x2190, 0x21FF) }, //$NON-NLS-1$ + { "MathematicalOperators", new LazyRange(0x2200, 0x22FF) }, //$NON-NLS-1$ + { "MiscellaneousTechnical", new LazyRange(0x2300, 0x23FF) }, //$NON-NLS-1$ + { "ControlPictures", new LazyRange(0x2400, 0x243F) }, //$NON-NLS-1$ + { "OpticalCharacterRecognition", new LazyRange(0x2440, 0x245F) }, //$NON-NLS-1$ + { "EnclosedAlphanumerics", new LazyRange(0x2460, 0x24FF) }, //$NON-NLS-1$ + { "BoxDrawing", new LazyRange(0x2500, 0x257F) }, //$NON-NLS-1$ + { "BlockElements", new LazyRange(0x2580, 0x259F) }, //$NON-NLS-1$ + { "GeometricShapes", new LazyRange(0x25A0, 0x25FF) }, //$NON-NLS-1$ + { "MiscellaneousSymbols", new LazyRange(0x2600, 0x26FF) }, //$NON-NLS-1$ + { "Dingbats", new LazyRange(0x2700, 0x27BF) }, //$NON-NLS-1$ + { "MiscellaneousMathematicalSymbols-A", //$NON-NLS-1$ + new LazyRange(0x27C0, 0x27EF) }, + { "SupplementalArrows-A", new LazyRange(0x27F0, 0x27FF) }, //$NON-NLS-1$ + { "BraillePatterns", new LazyRange(0x2800, 0x28FF) }, //$NON-NLS-1$ + { "SupplementalArrows-B", new LazyRange(0x2900, 0x297F) }, //$NON-NLS-1$ + { "MiscellaneousMathematicalSymbols-B", //$NON-NLS-1$ + new LazyRange(0x2980, 0x29FF) }, + { "SupplementalMathematicalOperators", //$NON-NLS-1$ + new LazyRange(0x2A00, 0x2AFF) }, + { "MiscellaneousSymbolsandArrows", //$NON-NLS-1$ + new LazyRange(0x2B00, 0x2BFF) }, + { "Glagolitic", new LazyRange(0x2C00, 0x2C5F) }, //$NON-NLS-1$ + { "Coptic", new LazyRange(0x2C80, 0x2CFF) }, //$NON-NLS-1$ + { "GeorgianSupplement", new LazyRange(0x2D00, 0x2D2F) }, //$NON-NLS-1$ + { "Tifinagh", new LazyRange(0x2D30, 0x2D7F) }, //$NON-NLS-1$ + { "EthiopicExtended", new LazyRange(0x2D80, 0x2DDF) }, //$NON-NLS-1$ + { "SupplementalPunctuation", new LazyRange(0x2E00, 0x2E7F) }, //$NON-NLS-1$ + { "CJKRadicalsSupplement", new LazyRange(0x2E80, 0x2EFF) }, //$NON-NLS-1$ + { "KangxiRadicals", new LazyRange(0x2F00, 0x2FDF) }, //$NON-NLS-1$ + { "IdeographicDescriptionCharacters", //$NON-NLS-1$ + new LazyRange(0x2FF0, 0x2FFF) }, + { "CJKSymbolsandPunctuation", new LazyRange(0x3000, 0x303F) }, //$NON-NLS-1$ + { "Hiragana", new LazyRange(0x3040, 0x309F) }, //$NON-NLS-1$ + { "Katakana", new LazyRange(0x30A0, 0x30FF) }, //$NON-NLS-1$ + { "Bopomofo", new LazyRange(0x3100, 0x312F) }, //$NON-NLS-1$ + { "HangulCompatibilityJamo", new LazyRange(0x3130, 0x318F) }, //$NON-NLS-1$ + { "Kanbun", new LazyRange(0x3190, 0x319F) }, //$NON-NLS-1$ + { "BopomofoExtended", new LazyRange(0x31A0, 0x31BF) }, //$NON-NLS-1$ + { "CJKStrokes", new LazyRange(0x31C0, 0x31EF) }, //$NON-NLS-1$ + { "KatakanaPhoneticExtensions", new LazyRange(0x31F0, 0x31FF) }, //$NON-NLS-1$ + { "EnclosedCJKLettersandMonths", new LazyRange(0x3200, 0x32FF) }, //$NON-NLS-1$ + { "CJKCompatibility", new LazyRange(0x3300, 0x33FF) }, //$NON-NLS-1$ + { "CJKUnifiedIdeographsExtensionA", //$NON-NLS-1$ + new LazyRange(0x3400, 0x4DB5) }, + { "YijingHexagramSymbols", new LazyRange(0x4DC0, 0x4DFF) }, //$NON-NLS-1$ + { "CJKUnifiedIdeographs", new LazyRange(0x4E00, 0x9FFF) }, //$NON-NLS-1$ + { "YiSyllables", new LazyRange(0xA000, 0xA48F) }, //$NON-NLS-1$ + { "YiRadicals", new LazyRange(0xA490, 0xA4CF) }, //$NON-NLS-1$ + { "ModifierToneLetters", new LazyRange(0xA700, 0xA71F) }, //$NON-NLS-1$ + { "SylotiNagri", new LazyRange(0xA800, 0xA82F) }, //$NON-NLS-1$ + { "HangulSyllables", new LazyRange(0xAC00, 0xD7A3) }, //$NON-NLS-1$ + { "HighSurrogates", new LazyRange(0xD800, 0xDB7F) }, //$NON-NLS-1$ + { "HighPrivateUseSurrogates", new LazyRange(0xDB80, 0xDBFF) }, //$NON-NLS-1$ + { "LowSurrogates", new LazyRange(0xDC00, 0xDFFF) }, //$NON-NLS-1$ + { "PrivateUseArea", new LazyRange(0xE000, 0xF8FF) }, //$NON-NLS-1$ + { "CJKCompatibilityIdeographs", new LazyRange(0xF900, 0xFAFF) }, //$NON-NLS-1$ + { "AlphabeticPresentationForms", new LazyRange(0xFB00, 0xFB4F) }, //$NON-NLS-1$ + { "ArabicPresentationForms-A", new LazyRange(0xFB50, 0xFDFF) }, //$NON-NLS-1$ + { "VariationSelectors", new LazyRange(0xFE00, 0xFE0F) }, //$NON-NLS-1$ + { "VerticalForms", new LazyRange(0xFE10, 0xFE1F) }, //$NON-NLS-1$ + { "CombiningHalfMarks", new LazyRange(0xFE20, 0xFE2F) }, //$NON-NLS-1$ + { "CJKCompatibilityForms", new LazyRange(0xFE30, 0xFE4F) }, //$NON-NLS-1$ + { "SmallFormVariants", new LazyRange(0xFE50, 0xFE6F) }, //$NON-NLS-1$ + { "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$ + { "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$ + { "all", new LazyRange(0x00, 0x10FFFF) }, //$NON-NLS-1$ + { "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$ + { "Cn", new LazyCategory(Character.UNASSIGNED, true) }, + { "IsL", new LazyCategoryScope(0x3E, true) }, + { "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) }, + { "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) }, + { "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) }, + { "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) }, + { "Lo", new LazyCategory(Character.OTHER_LETTER, true) }, + { "IsM", new LazyCategoryScope(0x1C0, true) }, + { "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) }, + { "Me", new LazyCategory(Character.ENCLOSING_MARK, false) }, + { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) }, + { "N", new LazyCategoryScope(0xE00, true) }, + { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) }, + { "Nl", new LazyCategory(Character.LETTER_NUMBER, true) }, + { "No", new LazyCategory(Character.OTHER_NUMBER, true) }, + { "IsZ", new LazyCategoryScope(0x7000, false) }, + { "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) }, + { "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) }, + { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) }, + { "IsC", new LazyCategoryScope(0xF0000, true, true) }, + { "Cc", new LazyCategory(Character.CONTROL, false) }, + { "Cf", new LazyCategory(Character.FORMAT, true) }, + { "Co", new LazyCategory(Character.PRIVATE_USE, true) }, + { "Cs", new LazyCategory(Character.SURROGATE, false, true) }, + {"IsP", new LazyCategoryScope((1 << Character.DASH_PUNCTUATION) | + (1 << Character.START_PUNCTUATION) | + (1 << Character.END_PUNCTUATION) | + (1 << Character.CONNECTOR_PUNCTUATION) | + (1 << Character.OTHER_PUNCTUATION) | + (1 << Character.INITIAL_QUOTE_PUNCTUATION) | + (1 << Character.FINAL_QUOTE_PUNCTUATION), true)}, + { "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) }, + { "Ps", new LazyCategory(Character.START_PUNCTUATION, false) }, + { "Pe", new LazyCategory(Character.END_PUNCTUATION, false) }, + { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) }, + { "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) }, + { "IsS", new LazyCategoryScope(0x7E000000, true) }, + { "Sm", new LazyCategory(Character.MATH_SYMBOL, true) }, + { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) }, + { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) }, + { "So", new LazyCategory(Character.OTHER_SYMBOL, true) }, + { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) }, + { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } }; + public Object getObject(String name) { + for (int i = 0; i < contents.length; ++i) { + Object[] row = contents[i]; + if (name.equals(row[0])) { + return row[1]; + } + } + return null; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java new file mode 100644 index 000000000..99d33e14c --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java @@ -0,0 +1,82 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Line terminator factory + * + * @author Nikolay A. Kuznetsov + */ +abstract class AbstractLineTerminator { + static AbstractLineTerminator unixLT = null; + + static AbstractLineTerminator unicodeLT = null; + + public abstract boolean isLineTerminator(int ch); + + public abstract boolean isAfterLineTerminator(int ch1, int ch2); + + public static AbstractLineTerminator getInstance(int flag) { + if ((flag & TPattern.UNIX_LINES) != 0) { + if (unixLT != null) + return unixLT; + unixLT = new AbstractLineTerminator() { + public boolean isLineTerminator(int ch) { + return ch == '\n'; + } + + public boolean isAfterLineTerminator(int ch, int ch2) { + return ch == '\n'; + } + }; + return unixLT; + } else { + if (unicodeLT != null) + return unicodeLT; + unicodeLT = new AbstractLineTerminator() { + public boolean isLineTerminator(int ch) { + return (ch == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'); + } + + public boolean isAfterLineTerminator(int ch, int ch2) { + return (ch == '\n' || ch == '\u0085' || (ch | 1) == '\u2029') + || (ch == '\r' && ch2 != '\n'); + } + }; + return unicodeLT; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java new file mode 100644 index 000000000..19788c1cf --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java @@ -0,0 +1,273 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Basic class for nodes, representing given regular expression. + * Note: All the classes representing nodes has set prefix; + * + * @author Nikolay A. Kuznetsov + */ +abstract class AbstractSet { + + public static final int TYPE_LEAF = 1 << 0; + + public static final int TYPE_FSET = 1 << 1; + + public static final int TYPE_QUANT = 1 << 3; + + public static final int TYPE_DOTSET = 0x80000000 | '.'; + + /** + * Next node to visit + */ + protected AbstractSet next; + + /** + * Counter for debugging purposes, represent unique node index; + */ + static int counter = 1; + + protected boolean isSecondPassVisited = false; + + protected String index = new Integer(AbstractSet.counter++).toString(); + + private int type = 0; + + public AbstractSet() { + } + + public AbstractSet(AbstractSet n) { + next = n; + } + + /** + * Checks if this node matches in given position and recursively call + * next node matches on positive self match. Returns positive integer if + * entire match succeed, negative otherwise + * @param stringIndex - string index to start from; + * @param testString - input string + * @param matchResult - MatchResult to sore result into + * @return -1 if match fails or n > 0; + */ + public abstract int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult); + + /** + * Attempts to apply pattern starting from this set/stringIndex; returns + * index this search was started from, if value is negative, this means that + * this search didn't succeed, additional information could be obtained via + * matchResult; + * + * Note: this is default implementation for find method, it's based on + * matches, subclasses do not have to override find method unless + * more effective find method exists for a particular node type + * (sequence, i.e. substring, for example). Same applies for find back + * method. + * + * @param stringIndex + * starting index + * @param testString + * string to search in + * @param matchResult + * result of the match + * @return last searched index + */ + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int length = matchResult.getRightBound(); + while (stringIndex <= length) { + if (matches(stringIndex, testString, matchResult) >= 0) { + return stringIndex; + } else { + stringIndex++; + } + } + return -1; + } + + /** + * @param stringIndex - + * an index, to finish search back (left limit) + * @param startSearch - + * an index to start search from (right limit) + * @param testString - + * test string; + * @param matchResult + * match result + * @return an index to start back search next time if this search fails(new + * left bound); if this search fails the value is negative; + */ + public int findBack(int stringIndex, int startSearch, + CharSequence testString, MatchResultImpl matchResult) { + while (startSearch >= stringIndex) { + if (matches(startSearch, testString, matchResult) >= 0) { + return startSearch; + } else { + startSearch--; + } + } + return -1; + } + + /** + * Returns true, if this node has consumed any characters during + * positive match attempt, for example node representing character always + * consumes one character if it matches. If particular node matches + * empty sting this method will return false; + * + * @param matchResult + * @return + */ + public abstract boolean hasConsumed(MatchResultImpl matchResult); + + /** + * Returns name for the particular node type. + * Used for debugging purposes. + */ + protected abstract String getName(); + + protected void setType(int type) { + this.type = type; + } + + public int getType() { + return this.type; + } + + protected String getQualifiedName() { + return "<" + index + ":" + getName() + ">"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + } + + public String toString() { + return getQualifiedName(); + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return next; + } + + /** + * Sets next abstract set + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + /** + * Returns true if the given node intersects with this one, + * false otherwise. + * This method is being used for quantifiers construction, + * lets consider the following regular expression (a|b)*ccc. + * + * (a|b) does not intersects with "ccc" and thus can be quantified + * greedily (w/o kickbacks), like *+ instead of *. + * + * @param set - usually previous node + * + * @return true if the given node intersects with this one + */ + public boolean first(AbstractSet set) { + return true; + } + + /** + * This method is used for replacement backreferenced + * sets. + * + * @param prev - node who references to this node + * @return null if current node need not to be replaced + * JointSet which is replacement of + * current node otherwise + */ + public JointSet processBackRefReplacement() { + return null; + } + + /** + * This method is used for traversing nodes after the + * first stage of compilation. + */ + public void processSecondPass() { + this.isSecondPassVisited = true; + + if (next != null) { + + if (!next.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + JointSet set = next.processBackRefReplacement(); + + if (set != null) { + next.isSecondPassVisited = true; + next =(AbstractSet) set; + } + + /* + * End code to do during the pass + */ + next.processSecondPass(); + } else { + + /* + * We reach node through next but it is already traversed. + * You can see this situation for AltGroupQuantifierSet.next + * when we reach this node through + * AltGroupQuantifierSet.innerset. ... .next + */ + + /* + * Add here code to do during the pass + */ + if (next instanceof SingleSet + && ((FSet) ((JointSet) next).fSet).isBackReferenced) { + next = next.next; + } + + /* + * End code to do during the pass + */ + } + } + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java new file mode 100644 index 000000000..2ef5d69f3 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java @@ -0,0 +1,57 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * LookAhead FSet, always returns true; + * + * @author Nikolay A. Kuznetsov + */ +class AheadFSet extends FSet { + + public AheadFSet() { + super(-1); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + return stringIndex; + } + + protected String getName() { + return "AheadFSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java new file mode 100644 index 000000000..83ea21ad4 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java @@ -0,0 +1,69 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents "?" quantifier over composite sets. + * @author Nikolay A. Kuznetsov + */ +class AltGroupQuantifierSet extends GroupQuantifierSet { + + public AltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult); + + int nextIndex = innerSet.matches(stringIndex, testString, matchResult); + + if (nextIndex < 0) { + return next.matches(stringIndex, testString, matchResult); + } else { + return nextIndex; + } + } + + public void setNext(AbstractSet next) { + super.setNext(next); + innerSet.setNext(next); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java new file mode 100644 index 000000000..571ecde3b --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents "?" quantifier over leaf sets. + * + * @author Nikolay A. Kuznetsov + */ +class AltQuantifierSet extends LeafQuantifierSet { + + public AltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift = 0; + + if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) { + return shift; + } else { + return next.matches(stringIndex, testString, matchResult); + } + } + + public void setNext(AbstractSet next) { + super.setNext(next); + innerSet.setNext(next); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java new file mode 100644 index 000000000..9c2b92d3d --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java @@ -0,0 +1,70 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * @author Nikolay A. Kuznetsov + */ +class AtomicFSet extends FSet { + + int index; + + public AtomicFSet(int groupIndex) { + super(groupIndex); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int gr = getGroupIndex(); + matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr)); + index = stringIndex; + + return stringIndex; + } + + public int getIndex() { + return index; + } + + protected String getName() { + return "AtomicFSet"; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl mr) { + return false; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java new file mode 100644 index 000000000..56e203beb --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java @@ -0,0 +1,87 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * This class represent atomic group (?>X), once X matches, + * this match become unchangeable till the end of the match. + * + * @author Nikolay A. Kuznetsov + */ +class AtomicJointSet extends NonCapJointSet { + + public AtomicJointSet(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int start = matchResult.getConsumed(groupIndex); + matchResult.setConsumed(groupIndex, stringIndex); + + int size = children.size(); + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + int shift = e.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + // AtomicFset always returns true, but saves the index to run + // this next.match() from; + return next.matches(((AtomicFSet) fSet).getIndex(), testString, + matchResult); + } + } + + matchResult.setConsumed(groupIndex, start); + return -1; + } + + public void setNext(AbstractSet next) { + this.next = next; + } + + public AbstractSet getNext() { + return next; + } + + protected String getName() { + return "NonCapJointSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java new file mode 100644 index 000000000..35a56ebbc --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java @@ -0,0 +1,123 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Back reference node, i.e. \1-9; + * + * @author Nikolay A. Kuznetsov + */ +class BackReferenceSet extends CIBackReferenceSet { + + public BackReferenceSet(int groupIndex, int consCounter) { + super(groupIndex, consCounter); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + String group = getString(matchResult); + if (group == null + || (stringIndex + group.length()) > matchResult.getRightBound()) + return -1; + int shift = testString.toString().startsWith(group, stringIndex) ? group + .length() + : -1; + + if (shift < 0) { + return -1; + } + matchResult.setConsumed(consCounter, shift); + return next.matches(stringIndex + shift, testString, matchResult); + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + String group = getString(matchResult); + int strLength = matchResult.getLeftBound(); + + if (group == null || (strIndex + group.length()) > strLength) + return -1; + + String testStr = testString.toString(); + + while (strIndex <= strLength) { + strIndex = testStr.indexOf(group, strIndex); + + if (strIndex < 0) + return -1; + if (next + .matches(strIndex + group.length(), testString, matchResult) >= 0) { + return strIndex; + } + + strIndex++; + } + + return -1; + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + String group = getString(matchResult); + + if (group == null) + return -1; + + String testStr = testString.toString(); + + while (lastIndex >= strIndex) { + lastIndex = testStr.lastIndexOf(group, lastIndex); + + if (lastIndex < 0 || lastIndex < strIndex) + return -1; + if (next.matches(lastIndex + group.length(), testString, + matchResult) >= 0) { + return lastIndex; + } + + lastIndex--; + } + return -1; + } + + public boolean first(AbstractSet set) { + return true; + } + + public String getName() { + return "back reference: " + this.groupIndex; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java new file mode 100644 index 000000000..8e588d2e9 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java @@ -0,0 +1,123 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay Kuznetsov + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Group node over subexpression w/o alternations. + * This node is used if current group is referenced + * via backreference. + */ + +class BackReferencedSingleSet extends SingleSet { + + /* + * This class is needed only for overwriting find() + * and findBack() methods of SingleSet class, which is being + * back referenced. The following example explains the need + * for such substitution: + * Let's consider the pattern ".*(.)\\1". + * Leading .* works as follows: finds line terminator and runs findBack + * from that point. findBack method in its turn (in contrast to matches) + * sets group boundaries on the back trace. Thus at the point we + * try to match back reference(\\1) groups are not yet set. + * + * To fix this problem we replace backreferenced groups with instances of + * this class, which will use matches instead of find; this will affect + * performance, but ensure correctness of the match. + */ + + public BackReferencedSingleSet(AbstractSet child, FSet fSet) { + super(child, fSet); + } + + public BackReferencedSingleSet(SingleSet node) { + super(node.kid, ((FSet) node.fSet)); + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int res = 0; + int lastIndex = matchResult.getRightBound(); + int startSearch = stringIndex; + + for (; startSearch <= lastIndex; startSearch++) { + int saveStart = matchResult.getStart(groupIndex); + + matchResult.setStart(groupIndex, startSearch); + res = kid.matches(startSearch, testString, matchResult); + if (res >= 0) { + res = startSearch; + break; + } else { + matchResult.setStart(groupIndex, saveStart); + } + } + + return res; + } + + public int findBack(int stringIndex, int lastIndex, + CharSequence testString, MatchResultImpl matchResult) { + int res = 0; + int startSearch = lastIndex; + + for (; startSearch >= stringIndex; startSearch--) { + int saveStart = matchResult.getStart(groupIndex); + + matchResult.setStart(groupIndex, startSearch); + res = kid.matches(startSearch, testString, matchResult); + if (res >= 0) { + res = startSearch; + break; + } else { + matchResult.setStart(groupIndex, saveStart); + } + } + + return res; + } + + /** + * This method is used for replacement backreferenced + * sets. + * + * @param prev - node who references to this node + */ + public JointSet processBackRefReplacement() { + return null; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java new file mode 100644 index 000000000..37bd75872 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java @@ -0,0 +1,62 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * FSet for lookbehind constructs. Checks if string index saved by corresponding + * jointSet in "consumers" equals to current index and return current string + * index, return -1 otherwise. + * + * @author Nikolay A. Kuznetsov + */ +class BehindFSet extends FSet { + + public BehindFSet(int groupIndex) { + super(groupIndex); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int gr = getGroupIndex(); + int rightBound = matchResult.getConsumed(gr); + return (rightBound == stringIndex) ? stringIndex : -1; + } + + protected String getName() { + return "BehindFSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java new file mode 100644 index 000000000..45c3176ca --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java @@ -0,0 +1,106 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Case Insensitive back reference node; + * + * @author Nikolay A. Kuznetsov + */ +class CIBackReferenceSet extends JointSet { + + protected int referencedGroup; + + protected int consCounter; + + /** + * @param substring + */ + public CIBackReferenceSet(int groupIndex, int consCounter) { + this.referencedGroup = groupIndex; + this.consCounter = consCounter; + } + + public int accepts(int strIndex, CharSequence testString) { + throw new TPatternSyntaxException("", "", 0); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + String group = getString(matchResult); + + if (group == null + || (stringIndex + group.length()) > matchResult.getRightBound()) + return -1; + + for (int i = 0; i < group.length(); i++) { + if (group.charAt(i) != testString.charAt(stringIndex + i) + && TPattern.getSupplement(group.charAt(i)) != testString + .charAt(stringIndex + i)) { + return -1; + } + } + matchResult.setConsumed(consCounter, group.length()); + return next.matches(stringIndex + group.length(), testString, + matchResult); + } + + public AbstractSet getNext() { + return this.next; + } + + public void setNext(AbstractSet next) { + this.next = next; + } + + protected String getString(MatchResultImpl matchResult) { + String res = matchResult.getGroupNoCheck(referencedGroup); + return res; + // return (res != null) ? res : ""; + } + + public String getName() { + return "CI back reference: " + this.groupIndex; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons; + boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); + matchResult.setConsumed(consCounter, -1); + return res; + } + +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java new file mode 100644 index 000000000..2d4537d62 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java @@ -0,0 +1,67 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character in + * case insensitive manner. + * + * @author Nikolay A. Kuznetsov + */ +class CICharSet extends LeafSet { + + private char ch; + + private char supplement; + + public CICharSet(char ch) { + this.ch = ch; + this.supplement = TPattern.getSupplement(ch); + } + + public int accepts(int strIndex, CharSequence testString) { + return (this.ch == testString.charAt(strIndex) + || this.supplement == testString.charAt(strIndex)) ? 1 : -1; + } + + protected String getName() { + return "CI " + ch; //$NON-NLS-1$ + } + + protected char getChar() { + return ch; + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIDecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIDecomposedCharSet.java new file mode 100644 index 000000000..24204e94c --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIDecomposedCharSet.java @@ -0,0 +1,50 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents case insensitive + * canonical decomposition of + * Unicode character. Is used when + * CANON_EQ flag of Pattern class + * is specified. + */ +class CIDecomposedCharSet extends DecomposedCharSet{ + + /* + * Just only a stub + */ + public CIDecomposedCharSet(int [] decomp, int decomposedCharLength) { + super(decomp, decomposedCharLength); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java new file mode 100644 index 000000000..797023461 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java @@ -0,0 +1,71 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * This class represents ASCII case insensitive character sequences. + * + * @author Nikolay A. Kuznetsov + */ +class CISequenceSet extends LeafSet { + + private String string = null; + + /** + * Constructs this sequence set + */ + CISequenceSet(StringBuffer substring) { + this.string = substring.toString(); + this.charCount = substring.length(); + } + + public int accepts(int strIndex, CharSequence testString) { + for (int i = 0; i < string.length(); i++) { + if (string.charAt(i) != testString.charAt(strIndex + i) + && TPattern.getSupplement(string.charAt(i)) != testString + .charAt(strIndex + i)) { + return -1; + } + } + + return string.length(); + + } + + public String getName() { + return "CI sequence: " + string; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java new file mode 100644 index 000000000..a6e301e77 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java @@ -0,0 +1,439 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class gives us a hashtable that contains canonical + * classes that are generated from + * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. + */ +class CanClasses{ + + private static IntHash table = null; + + private CanClasses() { + } + + public static IntHash getHashCanClasses() { + if (table != null) { + return table; + } else { + table = new IntHash(384); + table.put(0x20D0, 230); + table.put(0x0A4D, 9); + table.put(0x0E49, 107); + table.put(0x0954, 230); + table.put(0x0E48, 107); + table.put(0x0953, 230); + table.put(0x0952, 220); + table.put(0x0951, 230); + table.put(0x094D, 9); + table.put(0x0E3A, 9); + table.put(0x17DD, 230); + table.put(0x0A3C, 7); + table.put(0x0E39, 103); + table.put(0x0E38, 103); + table.put(0x17D2, 9); + table.put(0x093C, 7); + table.put(0x06ED, 220); + table.put(0x06EC, 230); + table.put(0x06EB, 230); + table.put(0x06EA, 220); + table.put(0x06E8, 230); + table.put(0x06E7, 230); + table.put(0x06E4, 230); + table.put(0x06E3, 220); + table.put(0x06E2, 230); + table.put(0x06E1, 230); + table.put(0x06E0, 230); + table.put(0x06DF, 230); + table.put(0x06DC, 230); + table.put(0x06DB, 230); + table.put(0x06DA, 230); + table.put(0x0BCD, 9); + table.put(0x0486, 230); + table.put(0x0485, 230); + table.put(0x0484, 230); + table.put(0x0FC6, 220); + table.put(0x0483, 230); + table.put(0x06D9, 230); + table.put(0x06D8, 230); + table.put(0x06D7, 230); + table.put(0x06D6, 230); + table.put(0xA806, 9); + table.put(0x193B, 220); + table.put(0x193A, 230); + table.put(0x1939, 222); + table.put(0x0D4D, 9); + table.put(0x1A18, 220); + table.put(0x1A17, 230); + table.put(0x1D1AD, 230); + table.put(0x1D1AC, 230); + table.put(0x1D1AB, 230); + table.put(0x1D1AA, 230); + table.put(0xFB1E, 26); + table.put(0x0ECB, 122); + table.put(0x0ECA, 122); + table.put(0x0ACD, 9); + table.put(0x0EC9, 122); + table.put(0x0EC8, 122); + table.put(0x09CD, 9); + table.put(0x0ABC, 7); + table.put(0x0EB9, 118); + table.put(0x0EB8, 118); + table.put(0x05C7, 18); + table.put(0x05C5, 220); + table.put(0x05C4, 230); + table.put(0x05C2, 25); + table.put(0x05C1, 24); + table.put(0x036F, 230); + table.put(0x036E, 230); + table.put(0x09BC, 7); + table.put(0x036D, 230); + table.put(0x036C, 230); + table.put(0x036B, 230); + table.put(0x036A, 230); + table.put(0x05BF, 23); + table.put(0x05BD, 22); + table.put(0x05BC, 21); + table.put(0x05BB, 20); + table.put(0x0C56, 91); + table.put(0x0C55, 84); + table.put(0x0369, 230); + table.put(0x0368, 230); + table.put(0x0367, 230); + table.put(0x0366, 230); + table.put(0x0365, 230); + table.put(0x0364, 230); + table.put(0x0363, 230); + table.put(0x0362, 233); + table.put(0x05B9, 19); + table.put(0x0361, 234); + table.put(0x05B8, 18); + table.put(0x0360, 234); + table.put(0x05B7, 17); + table.put(0x05B6, 16); + table.put(0x05B5, 15); + table.put(0x05B4, 14); + table.put(0x05B3, 13); + table.put(0x05B2, 12); + table.put(0x05B1, 11); + table.put(0x0C4D, 9); + table.put(0x05B0, 10); + table.put(0x035F, 233); + table.put(0x035E, 234); + table.put(0x035D, 234); + table.put(0x035C, 233); + table.put(0x035B, 230); + table.put(0x035A, 220); + table.put(0x05AF, 230); + table.put(0x05AE, 228); + table.put(0x05AD, 222); + table.put(0x05AC, 230); + table.put(0x05AB, 230); + table.put(0x05AA, 220); + table.put(0x1039, 9); + table.put(0x0359, 220); + table.put(0x0358, 232); + table.put(0x1037, 7); + table.put(0x0357, 230); + table.put(0x0356, 220); + table.put(0x0355, 220); + table.put(0x0354, 220); + table.put(0x0353, 220); + table.put(0x0352, 230); + table.put(0x05A9, 230); + table.put(0x0351, 230); + table.put(0x05A8, 230); + table.put(0x0350, 230); + table.put(0x05A7, 220); + table.put(0x05A6, 220); + table.put(0x05A5, 220); + table.put(0x05A4, 220); + table.put(0x05A3, 220); + table.put(0x05A2, 220); + table.put(0x074A, 230); + table.put(0x05A1, 230); + table.put(0x05A0, 230); + table.put(0x034E, 220); + table.put(0x034D, 220); + table.put(0x034C, 230); + table.put(0x034B, 230); + table.put(0x0749, 230); + table.put(0x034A, 230); + table.put(0x0748, 220); + table.put(0x0747, 230); + table.put(0x0746, 220); + table.put(0x0745, 230); + table.put(0x0744, 220); + table.put(0x0743, 230); + table.put(0x0742, 220); + table.put(0x0741, 230); + table.put(0x0349, 220); + table.put(0x0740, 230); + table.put(0x0348, 220); + table.put(0x0347, 220); + table.put(0x0346, 230); + table.put(0x0345, 240); + table.put(0x0344, 230); + table.put(0x0343, 230); + table.put(0x0342, 230); + table.put(0x0341, 230); + table.put(0x0340, 230); + table.put(0x073F, 230); + table.put(0x073E, 220); + table.put(0x073D, 230); + table.put(0x073C, 220); + table.put(0x073B, 220); + table.put(0x073A, 230); + table.put(0x309A, 8); + table.put(0x033F, 230); + table.put(0x033E, 230); + table.put(0x033D, 230); + table.put(0x033C, 220); + table.put(0x033B, 220); + table.put(0x0739, 220); + table.put(0x033A, 220); + table.put(0x0738, 220); + table.put(0x0737, 220); + table.put(0x0736, 230); + table.put(0x3099, 8); + table.put(0x0735, 230); + table.put(0xFE23, 230); + table.put(0x0734, 220); + table.put(0x0F87, 230); + table.put(0xFE22, 230); + table.put(0x0733, 230); + table.put(0x0F86, 230); + table.put(0xFE21, 230); + table.put(0x0732, 230); + table.put(0xFE20, 230); + table.put(0x0731, 220); + table.put(0x0F84, 9); + table.put(0x0339, 220); + table.put(0x0730, 230); + table.put(0x0F83, 230); + table.put(0x0338, 1); + table.put(0x0F82, 230); + table.put(0x0337, 1); + table.put(0x0336, 1); + table.put(0x0F80, 130); + table.put(0x0335, 1); + table.put(0x0334, 1); + table.put(0x0333, 220); + table.put(0x0332, 220); + table.put(0x0331, 220); + table.put(0x0330, 220); + table.put(0x1D244, 230); + table.put(0x1D243, 230); + table.put(0x1D242, 230); + table.put(0x0F7D, 130); + table.put(0x0F7C, 130); + table.put(0x0F7B, 130); + table.put(0x0F7A, 130); + table.put(0x032F, 220); + table.put(0x032E, 220); + table.put(0x032D, 220); + table.put(0x032C, 220); + table.put(0x032B, 220); + table.put(0x032A, 220); + table.put(0x0F74, 132); + table.put(0x0329, 220); + table.put(0x0328, 202); + table.put(0x0F72, 130); + table.put(0x0327, 202); + table.put(0x0DCA, 9); + table.put(0x0F71, 129); + table.put(0x0326, 220); + table.put(0x0325, 220); + table.put(0x0324, 220); + table.put(0x0323, 220); + table.put(0x0322, 202); + table.put(0x0321, 202); + table.put(0x0320, 220); + table.put(0x10A3F, 9); + table.put(0x135F, 230); + table.put(0x10A3A, 220); + table.put(0x031F, 220); + table.put(0x031E, 220); + table.put(0x031D, 220); + table.put(0x031C, 220); + table.put(0x031B, 216); + table.put(0x031A, 232); + table.put(0x10A39, 1); + table.put(0x10A38, 230); + table.put(0x0711, 36); + table.put(0x0319, 220); + table.put(0x0318, 220); + table.put(0x0317, 220); + table.put(0x0316, 220); + table.put(0x0315, 232); + table.put(0x0314, 230); + table.put(0x1D18B, 220); + table.put(0x0313, 230); + table.put(0x1D18A, 220); + table.put(0x0312, 230); + table.put(0x0311, 230); + table.put(0x0670, 35); + table.put(0x0310, 230); + table.put(0x1D189, 230); + table.put(0x1D188, 230); + table.put(0x1D187, 230); + table.put(0x1D186, 230); + table.put(0x030F, 230); + table.put(0x1D185, 230); + table.put(0x030E, 230); + table.put(0x030D, 230); + table.put(0x030C, 230); + table.put(0x1D182, 220); + table.put(0x030B, 230); + table.put(0x1D181, 220); + table.put(0x030A, 230); + table.put(0x1D180, 220); + table.put(0x0309, 230); + table.put(0x0308, 230); + table.put(0x1D17F, 220); + table.put(0x0307, 230); + table.put(0x1D17E, 220); + table.put(0x0306, 230); + table.put(0x1D17D, 220); + table.put(0x0305, 230); + table.put(0x1D17C, 220); + table.put(0x0304, 230); + table.put(0x1D17B, 220); + table.put(0x0303, 230); + table.put(0x0302, 230); + table.put(0x0301, 230); + table.put(0x0300, 230); + table.put(0x065E, 230); + table.put(0x065D, 230); + table.put(0x065C, 220); + table.put(0x065B, 230); + table.put(0x1D172, 216); + table.put(0x065A, 230); + table.put(0x1D171, 216); + table.put(0x0B4D, 9); + table.put(0x1D170, 216); + table.put(0x1734, 9); + table.put(0x0659, 230); + table.put(0x0658, 230); + table.put(0x0657, 230); + table.put(0x1D16F, 216); + table.put(0x0656, 220); + table.put(0x1D16E, 216); + table.put(0x0655, 220); + table.put(0x1D16D, 226); + table.put(0x0654, 230); + table.put(0x0653, 230); + table.put(0x0652, 34); + table.put(0x0651, 33); + table.put(0x0650, 32); + table.put(0x10A0F, 230); + table.put(0x10A0D, 220); + table.put(0x1D169, 1); + table.put(0x1D168, 1); + table.put(0x1D167, 1); + table.put(0x064F, 31); + table.put(0x1D166, 216); + table.put(0x064E, 30); + table.put(0x1D165, 216); + table.put(0x064D, 29); + table.put(0x064C, 28); + table.put(0x064B, 27); + table.put(0x0B3C, 7); + table.put(0x0F39, 216); + table.put(0x0F37, 220); + table.put(0x0F35, 220); + table.put(0x1DC3, 230); + table.put(0x1DC2, 220); + table.put(0x1DC1, 230); + table.put(0x1DC0, 230); + table.put(0x059F, 230); + table.put(0x1714, 9); + table.put(0x059E, 230); + table.put(0x059D, 230); + table.put(0x059C, 230); + table.put(0x059B, 220); + table.put(0x059A, 222); + table.put(0x0599, 230); + table.put(0x0598, 230); + table.put(0x0597, 230); + table.put(0x0596, 220); + table.put(0x0595, 230); + table.put(0x0594, 230); + table.put(0x0593, 230); + table.put(0x302F, 224); + table.put(0x0592, 230); + table.put(0x302E, 224); + table.put(0x0591, 220); + table.put(0x302D, 222); + table.put(0x302C, 232); + table.put(0x302B, 228); + table.put(0x302A, 218); + table.put(0x0F19, 220); + table.put(0x0F18, 220); + table.put(0x0CCD, 9); + table.put(0x0615, 230); + table.put(0x0614, 230); + table.put(0x18A9, 228); + table.put(0x0613, 230); + table.put(0x0612, 230); + table.put(0x0611, 230); + table.put(0x0CBC, 7); + table.put(0x0610, 230); + table.put(0x20EB, 1); + table.put(0x20EA, 1); + table.put(0x20E9, 230); + table.put(0x20E8, 220); + table.put(0x20E7, 230); + table.put(0x20E6, 1); + table.put(0x20E5, 1); + table.put(0x20E1, 230); + table.put(0x20DC, 230); + table.put(0x20DB, 230); + table.put(0x20DA, 1); + table.put(0x20D9, 1); + table.put(0x20D8, 1); + table.put(0x20D7, 230); + table.put(0x20D6, 230); + table.put(0x0E4B, 107); + table.put(0x20D5, 230); + table.put(0x0E4A, 107); + table.put(0x20D4, 230); + table.put(0x20D3, 1); + table.put(0x20D2, 1); + table.put(0x20D1, 230); + return table; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java new file mode 100644 index 000000000..f2e78f33c --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java @@ -0,0 +1,587 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.BitSet; + +/** + * User defined character classes ([abef]). See AbstractCharClass + * documentation for more details. + * + * @author Nikolay A. Kuznetsov + */ +class CharClass extends AbstractCharClass { + + // Flag indicates if we add supplement upper/lower case + boolean ci = false; + + boolean uci = false; + + // Flag indicates if there are unicode supplements + boolean hasUCI = false; + + boolean invertedSurrogates = false; + + boolean inverted = false; + + boolean hideBits = false; + + BitSet bits = new BitSet(); + + AbstractCharClass nonBitSet = null; + + public CharClass() { + } + + public CharClass(boolean ci, boolean uci) { + this.ci = ci; + this.uci = uci; + } + + public CharClass(boolean negative, boolean ci, boolean uci) { + this(ci, uci); + setNegative(negative); + } + + /* + * We can use this method safely even if nonBitSet != null + * due to specific of range constructions in regular expressions. + */ + public CharClass add(int ch) { + if (ci) { + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + if (!inverted) { + bits.set(TPattern.getSupplement((char) ch)); + } else { + bits.clear(TPattern.getSupplement((char) ch)); + } + } else if (uci && ch > 128) { + hasUCI = true; + ch = Character.toLowerCase(Character.toUpperCase(ch)); + // return this; + } + } + + if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) { + if (!invertedSurrogates) { + lowHighSurrogates.set(ch - Character.MIN_SURROGATE); + } else { + lowHighSurrogates.clear(ch - Character.MIN_SURROGATE); + } + } + + if (!inverted) { + bits.set(ch); + } else + bits.clear(ch); + + if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) { + mayContainSupplCodepoints = true; + } + + return this; + } + + /* + * The difference between add(AbstractCharClass) and union(AbstractCharClass) + * is that add() is used for constructions like "[^abc\\d]" + * (this pattern doesn't match "1") + * while union is used for constructions like "[^abc[\\d]]" + * (this pattern matches "1"). + */ + public CharClass add(final AbstractCharClass cc) { + + if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) { + mayContainSupplCodepoints = true; + } + + if (!invertedSurrogates) { + + //A | !B = ! ((A ^ B) & B) + if (cc.altSurrogates) { + lowHighSurrogates.xor(cc.getLowHighSurrogates()); + lowHighSurrogates.and(cc.getLowHighSurrogates()); + altSurrogates = !altSurrogates; + invertedSurrogates = true; + + //A | B + } else { + lowHighSurrogates.or(cc.getLowHighSurrogates()); + } + } else { + + //!A | !B = !(A & B) + if (cc.altSurrogates) { + lowHighSurrogates.and(cc.getLowHighSurrogates()); + + //!A | B = !(A & !B) + } else { + lowHighSurrogates.andNot(cc.getLowHighSurrogates()); + } + } + + if (!hideBits && cc.getBits() != null) { + if (!inverted) { + + //A | !B = ! ((A ^ B) & B) + if (cc.isNegative()) { + bits.xor(cc.getBits()); + bits.and(cc.getBits()); + alt = !alt; + inverted = true; + + //A | B + } else { + bits.or(cc.getBits()); + } + } else { + + //!A | !B = !(A & B) + if (cc.isNegative()) { + bits.and(cc.getBits()); + + //!A | B = !(A & !B) + } else { + bits.andNot(cc.getBits()); + } + } + } else { + final boolean curAlt = alt; + + if (nonBitSet == null) { + + if (curAlt && !inverted && bits.isEmpty()) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return cc.contains(ch); + } + }; + //alt = true; + } else { + + /* + * We keep the value of alt unchanged for + * constructions like [^[abc]fgb] by using + * the formula a ^ b == !a ^ !b. + */ + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !((curAlt ^ bits.get(ch)) + || ((curAlt ^ inverted) ^ cc.contains(ch))); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return (curAlt ^ bits.get(ch)) + || ((curAlt ^ inverted) ^ cc.contains(ch)); + } + }; + //alt = false + } + } + + hideBits = true; + } else { + final AbstractCharClass nb = nonBitSet; + + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !(curAlt ^ (nb.contains(ch) || cc.contains(ch))); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return curAlt ^ (nb.contains(ch) || cc.contains(ch)); + } + }; + //alt = false + } + } + } + + return this; + } + + public CharClass add(int st, int end) { + if (st > end) + throw new IllegalArgumentException(); + if (!ci + + //no intersection with surrogate characters + && (end < Character.MIN_SURROGATE + || st > Character.MAX_SURROGATE)) { + if (!inverted) { + bits.set(st, end + 1); + } else { + bits.clear(st, end + 1); + } + } else { + for (int i = st; i < end + 1; i++) { + add(i); + } + } + return this; + } + + // OR operation + public void union(final AbstractCharClass clazz) { + if (!mayContainSupplCodepoints + && clazz.mayContainSupplCodepoints) { + mayContainSupplCodepoints = true; + } + + if (clazz.hasUCI()) + this.hasUCI = true; + + + if (altSurrogates ^ clazz.altSurrogates) { + + //!A | B = !(A & !B) + if (altSurrogates) { + lowHighSurrogates.andNot(clazz.getLowHighSurrogates()); + + //A | !B = !((A ^ B) & B) + } else { + lowHighSurrogates.xor(clazz.getLowHighSurrogates()); + lowHighSurrogates.and(clazz.getLowHighSurrogates()); + altSurrogates = true; + } + + } else { + + //!A | !B = !(A & B) + if (altSurrogates) { + lowHighSurrogates.and(clazz.getLowHighSurrogates()); + + //A | B + } else { + lowHighSurrogates.or(clazz.getLowHighSurrogates()); + } + } + + if (!hideBits && clazz.getBits() != null) { + if (alt ^ clazz.isNegative()) { + + //!A | B = !(A & !B) + if (alt) { + bits.andNot(clazz.getBits()); + + //A | !B = !((A ^ B) & B) + } else { + bits.xor(clazz.getBits()); + bits.and(clazz.getBits()); + alt = true; + } + + } else { + + //!A | !B = !(A & B) + if (alt) { + bits.and(clazz.getBits()); + + //A | B + } else { + bits.or(clazz.getBits()); + } + } + } else { + final boolean curAlt = alt; + + if (nonBitSet == null) { + + if (!inverted && bits.isEmpty()) { + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !clazz.contains(ch); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return clazz.contains(ch); + } + }; + //alt = false + } + } else { + + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !(clazz.contains(ch) || (curAlt ^ bits.get(ch))); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return clazz.contains(ch) || (curAlt ^ bits.get(ch)); + } + }; + //alt = false + } + } + hideBits = true; + } else { + final AbstractCharClass nb = nonBitSet; + + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch)); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return (curAlt ^ nb.contains(ch)) || clazz.contains(ch); + } + }; + //alt = false + } + } + } + } + + // AND operation + public void intersection(final AbstractCharClass clazz) { + if (!mayContainSupplCodepoints + && clazz.mayContainSupplCodepoints) { + mayContainSupplCodepoints = true; + } + + if (clazz.hasUCI()) + this.hasUCI = true; + + if (altSurrogates ^ clazz.altSurrogates) { + + //!A & B = ((A ^ B) & B) + if (altSurrogates) { + lowHighSurrogates.xor(clazz.getLowHighSurrogates()); + lowHighSurrogates.and(clazz.getLowHighSurrogates()); + altSurrogates = false; + + //A & !B + } else { + lowHighSurrogates.andNot(clazz.getLowHighSurrogates()); + } + } else { + + //!A & !B = !(A | B) + if (altSurrogates) { + lowHighSurrogates.or(clazz.getLowHighSurrogates()); + + //A & B + } else { + lowHighSurrogates.and(clazz.getLowHighSurrogates()); + } + } + + if (!hideBits && clazz.getBits() != null) { + + if (alt ^ clazz.isNegative()) { + + //!A & B = ((A ^ B) & B) + if (alt) { + bits.xor(clazz.getBits()); + bits.and(clazz.getBits()); + alt = false; + + //A & !B + } else { + bits.andNot(clazz.getBits()); + } + } else { + + //!A & !B = !(A | B) + if (alt) { + bits.or(clazz.getBits()); + + //A & B + } else { + bits.and(clazz.getBits()); + } + } + } else { + final boolean curAlt = alt; + + if (nonBitSet == null) { + + if (!inverted && bits.isEmpty()) { + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !clazz.contains(ch); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return clazz.contains(ch); + } + }; + //alt = false + } + } else { + + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !(clazz.contains(ch) && (curAlt ^ bits.get(ch))); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return clazz.contains(ch) && (curAlt ^ bits.get(ch)); + } + }; + //alt = false + } + } + hideBits = true; + } else { + final AbstractCharClass nb = nonBitSet; + + if (curAlt) { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch)); + } + }; + //alt = true + } else { + nonBitSet = new AbstractCharClass() { + public boolean contains(int ch) { + return (curAlt ^ nb.contains(ch)) && clazz.contains(ch); + } + }; + //alt = false + } + } + } + } + + /** + * Returns true if character class contains symbol specified, + * false otherwise. Note: #setNegative() method changes the + * meaning of contains method; + * + * @param ch + * @return true if character class contains symbol specified; + * + * TODO: currently character class implementation based on + * BitSet, but this implementation possibly will be turned to combined + * BitSet(for first 256 symbols) and Black/Red tree for the rest of UTF. + */ + public boolean contains(int ch) { + if (nonBitSet == null) { + return this.alt ^ bits.get(ch); + } else { + return alt ^ nonBitSet.contains(ch); + } + } + + protected BitSet getBits() { + if (hideBits) + return null; + return bits; + } + + protected BitSet getLowHighSurrogates() { + return lowHighSurrogates; + } + + public AbstractCharClass getInstance() { + + if (nonBitSet == null) { + final BitSet bs = getBits(); + + AbstractCharClass res = new AbstractCharClass() { + public boolean contains(int ch) { + return this.alt ^ bs.get(ch); + } + + public String toString() { + StringBuilder temp = new StringBuilder(); + for (int i = bs.nextSetBit(0); i >= 0; i = bs + .nextSetBit(i + 1)) { + temp.append(Character.toChars(i)); + temp.append('|'); + } + + if (temp.length() > 0) + temp.deleteCharAt(temp.length() - 1); + + return temp.toString(); + } + + }; + return res.setNegative(isNegative()); + } else { + return this; + } + } + + //for debugging purposes only + public String toString() { + StringBuilder temp = new StringBuilder(); + for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { + temp.append(Character.toChars(i)); + temp.append('|'); + } + + if (temp.length() > 0) + temp.deleteCharAt(temp.length() - 1); + + return temp.toString(); + } + + public boolean hasUCI() { + return hasUCI; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java new file mode 100644 index 000000000..1ee65d84d --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java @@ -0,0 +1,126 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character. + * + * @author Nikolay A. Kuznetsov + */ +class CharSet extends LeafSet { + + private char ch = 0; + + public CharSet(char ch) { + this.ch = ch; + } + + public int charCount() { + return 1; + } + + public int accepts(int strIndex, CharSequence testString) { + return (this.ch == testString.charAt(strIndex)) ? 1 : -1; + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + String testStr = (String) testString; + int strLength = matchResult.getRightBound(); + + while (strIndex < strLength) { + strIndex = testStr.indexOf(ch, strIndex); + if (strIndex < 0) + return -1; + if (next.matches(strIndex + 1, testString, matchResult) >= 0) { + return strIndex; + } + strIndex++; + } + + return -1; + } + + return super.find(strIndex, testString, matchResult); + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + String testStr = (String) testString; + + while (lastIndex >= strIndex) { + lastIndex = testStr.lastIndexOf(ch, lastIndex); + if (lastIndex < 0 || lastIndex < strIndex) { + return -1; + } + + if (next.matches(lastIndex + 1, testString, matchResult) >= 0) { + return lastIndex; + } + + lastIndex--; + } + + return -1; + } + + return super.findBack(strIndex, lastIndex, testString, matchResult); + } + + protected String getName() { + return "" + ch; //$NON-NLS-1$ + } + + protected char getChar() { + return ch; + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return ((CharSet) set).getChar() == ch; + } else if (set instanceof RangeSet) { + return ((RangeSet) set).accepts(0, Character.toString(ch)) > 0; + } else if (set instanceof SupplRangeSet) { + return ((SupplRangeSet) set).contains(ch); + } else if (set instanceof SupplCharSet) { + return false; + } + + return true; + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java new file mode 100644 index 000000000..4cfd19cdf --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java @@ -0,0 +1,103 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Composite (i.e. {n,m}) quantifier node for groups ("(X){n,m}") + * + * @author Nikolay A. Kuznetsov + */ +class CompositeGroupQuantifierSet extends GroupQuantifierSet { + + protected Quantifier quantifier = null; + + int setCounter; + + /** + * Constructs CompositeGroupQuantifierSet + * @param quant - given composite quantifier + * @param innerSet - given group + * @param next - next set after the quantifier + */ + public CompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, + AbstractSet next, int type, int setCounter) { + super(innerSet, next, type); + this.quantifier = quant; + this.setCounter = setCounter; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int enterCounter = matchResult.getEnterCounter(setCounter); + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult); + + // can't go inner set; + if (enterCounter >= quantifier.max()) { + return next.matches(stringIndex, testString, matchResult); + } + + // go inner set; + matchResult.setEnterCounter(setCounter, ++enterCounter); + int nextIndex = innerSet.matches(stringIndex, testString, matchResult); + + if (nextIndex < 0) { + matchResult.setEnterCounter(setCounter, --enterCounter); + if (enterCounter >= quantifier.min()) { + return next.matches(stringIndex, testString, matchResult); + } else { + matchResult.setEnterCounter(setCounter, 0); + return -1; + } + } else { + matchResult.setEnterCounter(setCounter, 0); + return nextIndex; + } + } + + public void reset() { + quantifier.resetCounter(); + } + + protected String getName() { + return quantifier.toString(); + } + + void setQuantifier(Quantifier quant) { + this.quantifier = quant; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java new file mode 100644 index 000000000..652759d35 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java @@ -0,0 +1,104 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Composite (i.e. {n,m}) quantifier node over the leaf nodes ("a{n,m}") + * + * @author Nikolay A. Kuznetsov + */ +class CompositeQuantifierSet extends LeafQuantifierSet { + + protected Quantifier quantifier = null; + + public CompositeQuantifierSet(Quantifier quant, LeafSet innerSet, + AbstractSet next, int type) { + super(innerSet, next, type); + this.quantifier = quant; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int min = quantifier.min(); + int max = quantifier.max(); + int i = 0; + + for (; i < min; i++) { + + if (stringIndex + leaf.charCount() > matchResult.getRightBound()) { + matchResult.hitEnd = true; + return -1; + } + + int shift = leaf.accepts(stringIndex, testString); + if (shift < 1) { + return -1; + } + stringIndex += shift; + } + + for (; i < max; i++) { + int shift; + if (stringIndex + leaf.charCount() > matchResult.getRightBound() + || (shift = leaf.accepts(stringIndex, testString)) < 1) { + break; + } + stringIndex += shift; + } + + for (; i >= min; i--) { + int shift = next.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + stringIndex -= leaf.charCount(); + } + return -1; + + } + + public void reset() { + quantifier.resetCounter(); + } + + protected String getName() { + return quantifier.toString(); + } + + void setQuantifier(Quantifier quant) { + this.quantifier = quant; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java new file mode 100644 index 000000000..d6858d663 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java @@ -0,0 +1,181 @@ +/* + * Copyright 2014 Alexey Andreev. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class is used to split the range that contains surrogate + * characters into two ranges: the first consisting of these surrogate + * characters and the second consisting of all others characters + * from the parent range. + * This class represents the parent range split in such a manner. + */ +class CompositeRangeSet extends JointSet { + + //range without surrogates + AbstractSet withoutSurrogates; + + //range containing surrogates only + AbstractSet withSurrogates; + + public CompositeRangeSet(AbstractSet withoutSurrogates, + AbstractSet withSurrogates, AbstractSet next) { + this.withoutSurrogates = withoutSurrogates; + this.withSurrogates = withSurrogates; + setNext(next); + } + + public CompositeRangeSet(AbstractSet withoutSurrogates, + AbstractSet withSurrogates) { + this.withoutSurrogates = withoutSurrogates; + this.withSurrogates = withSurrogates; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift = withoutSurrogates.matches(stringIndex, testString, matchResult); + + if (shift < 0) { + shift = withSurrogates.matches(stringIndex, testString, matchResult); + } + + if (shift >= 0) { + return shift; + } + return -1; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + withSurrogates.setNext(next); + withoutSurrogates.setNext(next); + } + + public AbstractSet getSurrogates() { + return withSurrogates; + } + + public AbstractSet getWithoutSurrogates() { + return withoutSurrogates; + } + + protected String getName() { + return "CompositeRangeSet: " + " " + + withoutSurrogates + " " + + withSurrogates; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } + + public boolean first(AbstractSet set) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java new file mode 100644 index 000000000..151c93c10 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents canonical decomposition of + * Unicode character. Is used when + * CANON_EQ flag of Pattern class + * is specified. + */ +class DecomposedCharSet extends JointSet { + + /** + * Contains information about number of chars + * that were read for a codepoint last time + */ + private int readCharsForCodePoint = 1; + + /** + * UTF-16 encoding of decomposedChar + */ + private String decomposedCharUTF16 = null; + + /** + * Decomposition of the Unicode codepoint + */ + private int [] decomposedChar; + + /** + * Length of useful part of decomposedChar + * decomposedCharLength <= decomposedChar.length + */ + private int decomposedCharLength; + + public DecomposedCharSet(int [] decomposedChar, int decomposedCharLength) { + this.decomposedChar = decomposedChar; + this.decomposedCharLength = decomposedCharLength; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + + /* + * All decompositions have length that + * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH + */ + int [] decCurCodePoint; + int [] decCodePoint = new int [Lexer.MAX_DECOMPOSITION_LENGTH]; + int readCodePoints = 0; + int rightBound = matchResult.getRightBound(); + int curChar; + int i = 0; + + if (strIndex >= rightBound) { + return -1; + } + + /* + * We read testString and decompose it gradually to compare with + * this decomposedChar at position strIndex + */ + curChar = codePointAt(strIndex, testString, rightBound); + strIndex += readCharsForCodePoint; + decCurCodePoint = Lexer.getDecomposition(curChar); + if (decCurCodePoint == null) { + decCodePoint[readCodePoints++] = curChar; + } else { + i = decCurCodePoint.length; + System.arraycopy(decCurCodePoint, 0, decCodePoint, 0, i); + readCodePoints += i; + } + + if (strIndex < rightBound) { + curChar = codePointAt(strIndex, testString, rightBound); + + /* + * Read testString until we met a decomposed char boundary + * and decompose obtained portion of testString + */ + while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) + && !Lexer.isDecomposedCharBoundary(curChar)) { + + if (Lexer.hasDecompositionNonNullCanClass(curChar)) { + + /* + * A few codepoints have decompositions and non null + * canonical classes, we have to take them into + * consideration, but general rule is: + * if canonical class != 0 then no decomposition + */ + decCurCodePoint = Lexer.getDecomposition(curChar); + + /* + * Length of such decomposition is 1 or 2. See + * UnicodeData file + * http://www.unicode.org/Public/4.0-Update + * /UnicodeData-4.0.0.txt + */ + if (decCurCodePoint.length == 2) { + decCodePoint[readCodePoints++] = decCurCodePoint[0]; + decCodePoint[readCodePoints++] = decCurCodePoint[1]; + } else { + decCodePoint[readCodePoints++] = decCurCodePoint[0]; + } + } else { + decCodePoint[readCodePoints++] = curChar; + } + + strIndex += readCharsForCodePoint; + + if (strIndex < rightBound) { + curChar = codePointAt(strIndex, testString, rightBound); + } else { + break; + } + } + } + + /* + * Some optimization since length of decomposed char is <= 3 usually + */ + switch (readCodePoints) { + case 0: + case 1: + case 2: + break; + + case 3: + int i1 = Lexer.getCanonicalClass(decCodePoint[1]); + int i2 = Lexer.getCanonicalClass(decCodePoint[2]); + + if ((i2 != 0) && (i1 > i2)) { + i1 = decCodePoint[1]; + decCodePoint[1] = decCodePoint[2]; + decCodePoint[2] = i1; + } + break; + + default: + decCodePoint = Lexer.getCanonicalOrder(decCodePoint, + readCodePoints); + } + + /* + * Compare decomposedChar with decomposed char + * that was just read from testString + */ + if (readCodePoints != decomposedCharLength) { + return -1; + } + + for (i = 0; i < readCodePoints; i++) { + if (decCodePoint[i] != decomposedChar[i]) { + return -1; + } + } + + return next.matches(strIndex, testString, matchResult); + } + + /** + * Return UTF-16 encoding of given Unicode codepoint. + * + * @return UTF-16 encoding + */ + private String getDecomposedChar() { + if (decomposedCharUTF16 == null) { + StringBuilder strBuff = new StringBuilder(); + + for (int i = 0; i < decomposedCharLength; i++) { + strBuff.append(Character.toChars(decomposedChar[i])); + } + decomposedCharUTF16 = strBuff.toString(); + } + return decomposedCharUTF16; + } + + protected String getName() { + return "decomposed char:" + getDecomposedChar(); //$NON-NLS-1$ + } + + /** + * Reads Unicode codepoint from input. + * + * @param strIndex - index to read codepoint at + * @param testString - input + * @param matchResult - auxiliary object + * @return codepoint at given strIndex at testString and + */ + public int codePointAt(int strIndex, CharSequence testString, + int rightBound) { + + /* + * We store information about number of codepoints + * we read at variable readCharsForCodePoint. + */ + int curChar; + + readCharsForCodePoint = 1; + if (strIndex < rightBound - 1) { + char high = testString.charAt(strIndex++); + char low = testString.charAt(strIndex); + + if (Character.isSurrogatePair(high, low)) { + char [] curCodePointUTF16 = new char [] {high, low}; + curChar = Character.codePointAt(curCodePointUTF16, 0); + readCharsForCodePoint = 2; + } else { + curChar = high; + } + } else { + curChar = testString.charAt(strIndex); + } + + return curChar; + } + + public boolean first(AbstractSet set) { + return (set instanceof DecomposedCharSet) + ? ((DecomposedCharSet) set).getDecomposedChar() + .equals(getDecomposedChar()) + : true; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} + diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java new file mode 100644 index 000000000..a3f0f56af --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Special node for ".*" construction for any character + * including line terminators. + * + * @author Nikolay A. Kuznetsov + */ +class DotAllQuantifierSet extends QuantifierSet { + + public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int strLength = matchResult.getRightBound(); + + if (strLength <= stringIndex) { + return next.matches(stringIndex, testString, matchResult); + } + return next.findBack(stringIndex, strLength, testString, matchResult); + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + if (next.findBack(stringIndex, strLength, testString, matchResult) >= 0) { + return stringIndex; + } else { + return -1; + } + } + + protected String getName() { + return ""; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java new file mode 100644 index 000000000..d1b7f9e48 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Node accepting any character including line terminators. + * + * @author Nikolay A. Kuznetsov + */ +class DotAllSet extends JointSet { + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + + if (stringIndex + 1 > strLength) { + matchResult.hitEnd = true; + return -1; + } + + char high = testString.charAt(stringIndex); + + if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) { + char low = testString.charAt(stringIndex + 1); + + if (Character.isSurrogatePair(high, low)) { + return next.matches(stringIndex + 2, testString, matchResult); + } + } + return next.matches(stringIndex + 1, testString, matchResult); + } + + protected String getName() { + return "DotAll"; //$NON-NLS-1$ + } + + + public AbstractSet getNext() { + return this.next; + } + + public void setNext(AbstractSet next) { + this.next = next; + } + + public int getType() { + return AbstractSet.TYPE_DOTSET; + } + + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java new file mode 100644 index 000000000..201e7984e --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Special node for ".*" construction. + * The main idea here is to find line terminator and try to find the rest of + * the construction from this point. + * + * @author Nikolay A. Kuznetsov + */ +class DotQuantifierSet extends QuantifierSet { + + AbstractLineTerminator lt; + + public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type, + AbstractLineTerminator lt) { + super(innerSet, next, type); + this.lt = lt; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int strLength = matchResult.getRightBound(); + + int startSearch = /* testString.toString().indexOf('\n', stringIndex); */ + findLineTerminator(stringIndex, strLength, testString); + + if (startSearch < 0) { + startSearch = strLength; + } + + if (startSearch <= stringIndex) { + return next.matches(stringIndex, testString, matchResult); + } + return next.findBack(stringIndex, startSearch, testString, matchResult); + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + // String testStr = testString.toString(); + int strLength = matchResult.getRightBound(); + // 1. skip line terminators ??? + // // + // we don't skip line terminators here, but return zero match instead + // // + + // 2. find first occurrence of the searched pattern + // // + int res = next.find(stringIndex, testString, matchResult); + + // 3. Check if we have other occurrences till the end of line + // (because .* is greedy and we need last one) + // // + if (res >= 0) { + int nextSearch = findLineTerminator(res, strLength, testString); + // testStr.indexOf('\n', res); + if (nextSearch < 0) { + nextSearch = strLength; + } + nextSearch = next + .findBack(res, nextSearch, testString, matchResult); + res = (res < nextSearch) ? nextSearch : res; + } else { + return -1; + } + + // 4. find left boundary of this search + // // + int leftBound = (res > 0) ? findBackLineTerminator(stringIndex, + res - 1, testString)/* testStr.lastIndexOf('\n', res - 1) */ + : (res == 0) ? 0 : -1; + res = (leftBound >= stringIndex) ? ((leftBound < res) ? leftBound + 1 + : leftBound) : stringIndex; + + return res; + } + + /* + * All line terminators are from Basic Multilingual Pane + */ + private int findLineTerminator(int from, int to, CharSequence testString) { + for (int i = from; i < to; i++) { + if (lt.isLineTerminator(testString.charAt(i))) { + return i; + } + } + return -1; + } + + private int findBackLineTerminator(int from, int to, CharSequence testString) { + for (int i = to; i >= from; i--) { + if (lt.isLineTerminator(testString.charAt(i))) { + return i; + } + } + return -1; + } + + protected String getName() { + return ""; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java new file mode 100644 index 000000000..b42304a62 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Node accepting any character except line terminators; + * + * @author Nikolay A. Kuznetsov + */ +final class DotSet extends JointSet { + + AbstractLineTerminator lt; + + public DotSet(AbstractLineTerminator lt) { + super(); + this.lt = lt; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + + if (stringIndex + 1 > strLength) { + matchResult.hitEnd = true; + return -1; + } + char high = testString.charAt(stringIndex); + + if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) { + char low = testString.charAt(stringIndex + 1); + + if (Character.isSurrogatePair(high, low)) { + return lt.isLineTerminator(Character.toCodePoint(high, low))? -1 + : next.matches(stringIndex + 2, testString, matchResult); + } + } + + return lt.isLineTerminator(high)? -1 + : next.matches(stringIndex + 1, testString, matchResult); + } + + protected String getName() { + return "."; //$NON-NLS-1$ + } + + + public AbstractSet getNext() { + return this.next; + } + + public void setNext(AbstractSet next) { + this.next = next; + } + + public int getType() { + return AbstractSet.TYPE_DOTSET; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java new file mode 100644 index 000000000..ee8ffdd75 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents end of input '\z', i.e. matches only character after the last one; + * + * @author Nikolay A. Kuznetsov + */ +class EOISet extends AbstractSet { + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int rightBound = matchResult.hasTransparentBounds() ? testString + .length() : matchResult.getRightBound(); + if (stringIndex < rightBound) + return -1; + + matchResult.hitEnd = true; + matchResult.requireEnd = true; + + return next.matches(stringIndex, testString, matchResult); + } + + /** + * Returns false, enough for quantifiers + */ + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "EOI"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java new file mode 100644 index 000000000..7464f3b72 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character. + * + * @author Nikolay A. Kuznetsov + */ +final class EOLSet extends AbstractSet { + + private int consCounter; + + public EOLSet(int counter) { + this.consCounter = counter; + } + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + int rightBound = matchResult.hasAnchoringBounds() ? matchResult + .getRightBound() : testString.length(); + + if (strIndex >= rightBound) { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + + // check final line terminator; + if ((rightBound - strIndex) == 2 && testString.charAt(strIndex) == '\r' + && testString.charAt(strIndex + 1) == '\n') { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + char ch; + + if ((rightBound - strIndex) == 1 + && (((ch = testString.charAt(strIndex)) == '\n' || ch == '\r' + || ch == '\u0085' || (ch | 1) == '\u2029'))) { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons; + boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); + matchResult.setConsumed(consCounter, -1); + return res; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java new file mode 100644 index 000000000..4f0529db8 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Valid constant zero character match. + * + * @author Nikolay A. Kuznetsov + */ +class EmptySet extends LeafSet { + + public EmptySet(AbstractSet next) { + super(next); + charCount = 0; + } + + /* + * @see java.util.regex.LeafSet#accepts(int, java.lang.CharSequence) + */ + public int accepts(int stringIndex, CharSequence testString) { + return 0; + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + int startStr = matchResult.getLeftBound(); + + while (stringIndex <= strLength) { + + //check for supplementary codepoints + if (stringIndex < strLength) { + char low = testString.charAt(stringIndex); + + if (Character.isLowSurrogate(low)) { + + if (stringIndex > startStr) { + char high = testString.charAt(stringIndex - 1); + if (Character.isHighSurrogate(high)) { + stringIndex++; + continue; + } + } + } + } + + if (next.matches(stringIndex, testString, matchResult) >= 0) { + return stringIndex; + } + stringIndex++; + } + + return -1; + } + + public int findBack(int stringIndex, int startSearch, + CharSequence testString, MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + int startStr = matchResult.getLeftBound(); + + while (startSearch >= stringIndex) { + + //check for supplementary codepoints + if (startSearch < strLength) { + char low = testString.charAt(startSearch); + + if (Character.isLowSurrogate(low)) { + + if (startSearch > startStr) { + char high = testString.charAt(startSearch - 1); + if (Character.isHighSurrogate(high)) { + startSearch--; + continue; + } + } + } + } + + if (next.matches(startSearch, testString, matchResult) >= 0) { + return startSearch; + } + startSearch--; + } + + return -1; + } + + /* + * @see java.util.regex.AbstractSet#getName() + */ + protected String getName() { + return ""; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl mr) { + return false; + } + +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java new file mode 100644 index 000000000..5d7007806 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * The node which marks end of the particular group. + * @author Nikolay A. Kuznetsov + */ +class FSet extends AbstractSet { + + static PossessiveFSet posFSet = new PossessiveFSet(); + + boolean isBackReferenced = false; + + private int groupIndex; + + public FSet(int groupIndex) { + this.groupIndex = groupIndex; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int end = matchResult.getEnd(groupIndex); + matchResult.setEnd(groupIndex, stringIndex); + int shift = next.matches(stringIndex, testString, matchResult); + /* + * if(shift >=0 && matchResult.getEnd(groupIndex) == -1) { + * matchResult.setEnd(groupIndex, stringIndex); } + */ + if (shift < 0) + matchResult.setEnd(groupIndex, end); + return shift; + } + + public int getGroupIndex() { + return groupIndex; + } + + protected String getName() { + return "fSet"; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl mr) { + return false; + } + + /** + * Marks the end of the particular group and not take into account possible + * kickbacks(required for atomic groups, for instance) + * + */ + static class PossessiveFSet extends AbstractSet { + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + return stringIndex; + } + + protected String getName() { + return "posFSet"; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl mr) { + return false; + } + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java new file mode 100644 index 000000000..d9c12a177 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Special construction which marks end of pattern. + * + * @author Nikolay A. Kuznetsov + */ +class FinalSet extends FSet { + + public FinalSet() { + super(0); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (matchResult.mode() == TMatcher.MODE_FIND + || stringIndex == matchResult.getRightBound()) { + matchResult.setValid(); + matchResult.setEnd(0, stringIndex); + return stringIndex; + } + return -1; + } + + protected String getName() { + return "FinalSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java new file mode 100644 index 000000000..5f86f59a3 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Default quantifier over groups, in fact this type of quantifier is + * generally used for constructions we cant identify number of characters they + * consume. + * + * @author Nikolay A. Kuznetsov + */ +class GroupQuantifierSet extends QuantifierSet { + + public GroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult);// return + // -1; + + int nextIndex = innerSet.matches(stringIndex, testString, matchResult); + + if (nextIndex < 0) { + return next.matches(stringIndex, testString, matchResult); + } else { + return nextIndex; + } + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java new file mode 100644 index 000000000..357ab3608 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents canonical decomposition of + * Hangul syllable. Is used when + * CANON_EQ flag of Pattern class + * is specified. + */ +class HangulDecomposedCharSet extends JointSet { + + /** + * Decomposed Hangul syllable. + */ + private char [] decomposedChar; + + /** + * String representing syllable + */ + private String decomposedCharUTF16 = null; + + /** + * Length of useful part of decomposedChar + * decomposedCharLength <= decomposedChar.length + */ + private int decomposedCharLength; + + public HangulDecomposedCharSet(char [] decomposedChar, int decomposedCharLength) { + this.decomposedChar = decomposedChar; + this.decomposedCharLength = decomposedCharLength; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + /** + * Give string representation of this. + * + * @return - string representation. + */ + private String getDecomposedChar() { + return (decomposedCharUTF16 == null) + ? (decomposedCharUTF16 = new String(decomposedChar)) + : decomposedCharUTF16; + } + + protected String getName() { + return "decomposed Hangul syllable:" + getDecomposedChar(); //$NON-NLS-1$ + } + + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + + /* + * All decompositions for Hangul syllables have length that + * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH + */ + int rightBound = matchResult.getRightBound(); + int SyllIndex = 0; + int [] decompSyllable = new int [Lexer + .MAX_HANGUL_DECOMPOSITION_LENGTH]; + int [] decompCurSymb; + char curSymb; + + /* + * For details about Hangul composition and decomposition see + * http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * "3.12 Conjoining Jamo Behavior" + */ + int LIndex = -1; + int VIndex = -1; + int TIndex = -1; + + if (strIndex >= rightBound) { + return -1; + } + curSymb = testString.charAt(strIndex++); + decompCurSymb = Lexer.getHangulDecomposition(curSymb); + + if (decompCurSymb == null) { + + /* + * We deal with ordinary letter or sequence of jamos + * at strIndex at testString. + */ + decompSyllable[SyllIndex++] = curSymb; + LIndex = curSymb - Lexer.LBase; + + if ((LIndex < 0) || (LIndex >= Lexer.LCount)) { + + /* + * Ordinary letter, that doesn't match this + */ + return -1; + } + + if (strIndex < rightBound) { + curSymb = testString.charAt(strIndex); + VIndex = curSymb - Lexer.VBase; + } + + if ((VIndex < 0) || (VIndex >= Lexer.VCount)) { + + /* + * Single L jamo doesn't compose Hangul syllable, + * so doesn't match + */ + return -1; + } + strIndex++; + decompSyllable[SyllIndex++] = curSymb; + + if (strIndex < rightBound) { + curSymb = testString.charAt(strIndex); + TIndex = curSymb - Lexer.TBase; + } + + if ((TIndex < 0) || (TIndex >= Lexer.TCount)) { + + /* + * We deal with LV syllable at testString, so + * compare it to this + */ + return ((decomposedCharLength == 2) + && (decompSyllable[0] == decomposedChar[0]) + && (decompSyllable[1] == decomposedChar[1])) + ? next.matches(strIndex, testString, matchResult) + : -1; + } + strIndex++; + decompSyllable[SyllIndex++] = curSymb; + + /* + * We deal with LVT syllable at testString, so + * compare it to this + */ + return ((decomposedCharLength == 3) + && (decompSyllable[0] == decomposedChar[0]) + && (decompSyllable[1] == decomposedChar[1]) + && (decompSyllable[2] == decomposedChar[2])) + ? next.matches(strIndex, testString, matchResult) + : -1; + } else { + + /* + * We deal with Hangul syllable at strIndex at testString. + * So we decomposed it to compare with this. + */ + int i = 0; + + if (decompCurSymb.length != decomposedCharLength) { + return -1; + } + + for (; i < decomposedCharLength; i++) { + if (decompCurSymb[i] != decomposedChar[i]) { + return -1; + } + } + return next.matches(strIndex, testString, matchResult); + } + } + + public boolean first(AbstractSet set) { + return (set instanceof HangulDecomposedCharSet) + ? ((HangulDecomposedCharSet) set).getDecomposedChar() + .equals(getDecomposedChar()) + : true; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} + diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java new file mode 100644 index 000000000..a47417602 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java @@ -0,0 +1,2072 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class gives us a hashtable that contains canonical + * decomposition mappings that are generated from + * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. + */ +class HashDecompositions{ + + private static IntArrHash table = null; + + private HashDecompositions() { + } + + public static IntArrHash getHashDecompositions() { + if (table != null) { + return table; + } else { + table = new IntArrHash(2032); + table.put(0x0929, new int [] {0x0928, 0x093C}); + table.put(0xFA9F, new int [] {0x72AF}); + table.put(0xFA9E, new int [] {0x7235}); + table.put(0xFA9D, new int [] {0x77A7}); + table.put(0xFA9C, new int [] {0x716E}); + table.put(0xFA9B, new int [] {0x701E}); + table.put(0xFA9A, new int [] {0x6F22}); + table.put(0x0BCC, new int [] {0x0BC6, 0x0BD7}); + table.put(0x0BCB, new int [] {0x0BC7, 0x0BBE}); + table.put(0x0BCA, new int [] {0x0BC6, 0x0BBE}); + table.put(0xFA99, new int [] {0x6ECB}); + table.put(0xFA98, new int [] {0x6EDB}); + table.put(0xFA97, new int [] {0x6D41}); + table.put(0xFA96, new int [] {0x6BBA}); + table.put(0xFA95, new int [] {0x6B79}); + table.put(0xFA94, new int [] {0x6756}); + table.put(0xFA93, new int [] {0x671B}); + table.put(0xFA92, new int [] {0x6717}); + table.put(0x1D164, new int [] {0x1D158, 0x1D165, 0x1D172}); + table.put(0xFA91, new int [] {0x6674}); + table.put(0x1D163, new int [] {0x1D158, 0x1D165, 0x1D171}); + table.put(0xFA90, new int [] {0x6556}); + table.put(0x1D162, new int [] {0x1D158, 0x1D165, 0x1D170}); + table.put(0x1D161, new int [] {0x1D158, 0x1D165, 0x1D16F}); + table.put(0x1D160, new int [] {0x1D158, 0x1D165, 0x1D16E}); + table.put(0xFA8F, new int [] {0x6452}); + table.put(0xFA8E, new int [] {0x641C}); + table.put(0xFA8D, new int [] {0x63C4}); + table.put(0xFA8C, new int [] {0x6234}); + table.put(0x1D15F, new int [] {0x1D158, 0x1D165}); + table.put(0xFA8B, new int [] {0x61F2}); + table.put(0x1D15E, new int [] {0x1D157, 0x1D165}); + table.put(0xFA8A, new int [] {0x6160}); + table.put(0xFA89, new int [] {0x618E}); + table.put(0xFA88, new int [] {0x6108}); + table.put(0xFA87, new int [] {0x614E}); + table.put(0xFA86, new int [] {0x60D8}); + table.put(0xFA85, new int [] {0x5FAD}); + table.put(0xFA84, new int [] {0x5F69}); + table.put(0xFA83, new int [] {0x5ED9}); + table.put(0xFA82, new int [] {0x5ED2}); + table.put(0xFA81, new int [] {0x5B28}); + table.put(0xFA80, new int [] {0x5A62}); + table.put(0xFA7F, new int [] {0x5954}); + table.put(0x1FFD, new int [] {0x00B4}); + table.put(0xFA7E, new int [] {0x5944}); + table.put(0x1FFC, new int [] {0x03A9, 0x0345}); + table.put(0xFA7D, new int [] {0x58B3}); + table.put(0x1FFB, new int [] {0x03A9, 0x0301}); + table.put(0xFA7C, new int [] {0x585A}); + table.put(0x1FFA, new int [] {0x03A9, 0x0300}); + table.put(0xFA7B, new int [] {0x55E2}); + table.put(0xFA7A, new int [] {0x5599}); + table.put(0x219B, new int [] {0x2192, 0x0338}); + table.put(0x219A, new int [] {0x2190, 0x0338}); + table.put(0x1FF9, new int [] {0x039F, 0x0301}); + table.put(0x1FF8, new int [] {0x039F, 0x0300}); + table.put(0xFA79, new int [] {0x5555}); + table.put(0x1FF7, new int [] {0x03C9, 0x0342, 0x0345}); + table.put(0xFA78, new int [] {0x559D}); + table.put(0x1FF6, new int [] {0x03C9, 0x0342}); + table.put(0xFA77, new int [] {0x52FA}); + table.put(0xFA76, new int [] {0x52C7}); + table.put(0x1FF4, new int [] {0x03C9, 0x0301, 0x0345}); + table.put(0xFA75, new int [] {0x5180}); + table.put(0x1FF3, new int [] {0x03C9, 0x0345}); + table.put(0xFA74, new int [] {0x5145}); + table.put(0x1FF2, new int [] {0x03C9, 0x0300, 0x0345}); + table.put(0xFA73, new int [] {0x4F80}); + table.put(0xFA72, new int [] {0x5168}); + table.put(0xFA71, new int [] {0x51B5}); + table.put(0xFA70, new int [] {0x4E26}); + table.put(0x1FEF, new int [] {0x0060}); + table.put(0x1FEE, new int [] {0x00A8, 0x0301}); + table.put(0x1FED, new int [] {0x00A8, 0x0300}); + table.put(0x1FEC, new int [] {0x03A1, 0x0314}); + table.put(0x1FEB, new int [] {0x03A5, 0x0301}); + table.put(0x1FEA, new int [] {0x03A5, 0x0300}); + table.put(0xFA6A, new int [] {0x983B}); + table.put(0x1FE9, new int [] {0x03A5, 0x0304}); + table.put(0x1FE8, new int [] {0x03A5, 0x0306}); + table.put(0xFA69, new int [] {0x97FF}); + table.put(0x1FE7, new int [] {0x03C5, 0x0308, 0x0342}); + table.put(0xFA68, new int [] {0x96E3}); + table.put(0x1FE6, new int [] {0x03C5, 0x0342}); + table.put(0xFA67, new int [] {0x9038}); + table.put(0x1FE5, new int [] {0x03C1, 0x0314}); + table.put(0xFA66, new int [] {0x8FB6}); + table.put(0x1FE4, new int [] {0x03C1, 0x0313}); + table.put(0xFA65, new int [] {0x8D08}); + table.put(0x1FE3, new int [] {0x03C5, 0x0308, 0x0301}); + table.put(0xFA64, new int [] {0x8CD3}); + table.put(0x1FE2, new int [] {0x03C5, 0x0308, 0x0300}); + table.put(0xFA63, new int [] {0x8B39}); + table.put(0x1FE1, new int [] {0x03C5, 0x0304}); + table.put(0xFA62, new int [] {0x8B01}); + table.put(0x1FE0, new int [] {0x03C5, 0x0306}); + table.put(0xFA61, new int [] {0x8996}); + table.put(0xFA60, new int [] {0x8910}); + table.put(0x1FDF, new int [] {0x1FFE, 0x0342}); + table.put(0x1FDE, new int [] {0x1FFE, 0x0301}); + table.put(0xFA5F, new int [] {0x8457}); + table.put(0x1FDD, new int [] {0x1FFE, 0x0300}); + table.put(0xFA5E, new int [] {0x8279}); + table.put(0xFA5D, new int [] {0x8279}); + table.put(0x1FDB, new int [] {0x0399, 0x0301}); + table.put(0xFA5C, new int [] {0x81ED}); + table.put(0x1FDA, new int [] {0x0399, 0x0300}); + table.put(0xFA5B, new int [] {0x8005}); + table.put(0xFA5A, new int [] {0x7F72}); + table.put(0x1FD9, new int [] {0x0399, 0x0304}); + table.put(0x1FD8, new int [] {0x0399, 0x0306}); + table.put(0xFA59, new int [] {0x7E41}); + table.put(0x1FD7, new int [] {0x03B9, 0x0308, 0x0342}); + table.put(0xFA58, new int [] {0x7E09}); + table.put(0x1FD6, new int [] {0x03B9, 0x0342}); + table.put(0xFA57, new int [] {0x7DF4}); + table.put(0xFA56, new int [] {0x7BC0}); + table.put(0xFA55, new int [] {0x7A81}); + table.put(0x1FD3, new int [] {0x03B9, 0x0308, 0x0301}); + table.put(0xFA54, new int [] {0x7A40}); + table.put(0x1FD2, new int [] {0x03B9, 0x0308, 0x0300}); + table.put(0xFA53, new int [] {0x798E}); + table.put(0x1FD1, new int [] {0x03B9, 0x0304}); + table.put(0xFA52, new int [] {0x798D}); + table.put(0x1FD0, new int [] {0x03B9, 0x0306}); + table.put(0xFA51, new int [] {0x795D}); + table.put(0xFA50, new int [] {0x7956}); + table.put(0x1FCF, new int [] {0x1FBF, 0x0342}); + table.put(0x1FCE, new int [] {0x1FBF, 0x0301}); + table.put(0xFA4F, new int [] {0x7950}); + table.put(0x1FCD, new int [] {0x1FBF, 0x0300}); + table.put(0xFA4E, new int [] {0x7948}); + table.put(0x1FCC, new int [] {0x0397, 0x0345}); + table.put(0xFA4D, new int [] {0x7949}); + table.put(0x1FCB, new int [] {0x0397, 0x0301}); + table.put(0xFA4C, new int [] {0x793E}); + table.put(0x1FCA, new int [] {0x0397, 0x0300}); + table.put(0xFA4B, new int [] {0x7891}); + table.put(0xFA4A, new int [] {0x7422}); + table.put(0x1FC9, new int [] {0x0395, 0x0301}); + table.put(0x1FC8, new int [] {0x0395, 0x0300}); + table.put(0xFA49, new int [] {0x722B}); + table.put(0x1FC7, new int [] {0x03B7, 0x0342, 0x0345}); + table.put(0xFA48, new int [] {0x716E}); + table.put(0x1FC6, new int [] {0x03B7, 0x0342}); + table.put(0xFA47, new int [] {0x6F22}); + table.put(0xFA46, new int [] {0x6E1A}); + table.put(0x1FC4, new int [] {0x03B7, 0x0301, 0x0345}); + table.put(0xFA45, new int [] {0x6D77}); + table.put(0x1FC3, new int [] {0x03B7, 0x0345}); + table.put(0xFA44, new int [] {0x6885}); + table.put(0x1FC2, new int [] {0x03B7, 0x0300, 0x0345}); + table.put(0xFA43, new int [] {0x6691}); + table.put(0x1FC1, new int [] {0x00A8, 0x0342}); + table.put(0xFA42, new int [] {0x65E2}); + table.put(0xFA41, new int [] {0x654F}); + table.put(0xFA40, new int [] {0x61F2}); + table.put(0x1FBE, new int [] {0x03B9}); + table.put(0xFA3F, new int [] {0x618E}); + table.put(0xFA3E, new int [] {0x6168}); + table.put(0x1FBC, new int [] {0x0391, 0x0345}); + table.put(0xFA3D, new int [] {0x6094}); + table.put(0x1FBB, new int [] {0x0391, 0x0301}); + table.put(0xFA3C, new int [] {0x5C6E}); + table.put(0x1FBA, new int [] {0x0391, 0x0300}); + table.put(0xFA3B, new int [] {0x5C64}); + table.put(0xFA3A, new int [] {0x58A8}); + table.put(0x1FB9, new int [] {0x0391, 0x0304}); + table.put(0x1FB8, new int [] {0x0391, 0x0306}); + table.put(0xFA39, new int [] {0x5840}); + table.put(0x1FB7, new int [] {0x03B1, 0x0342, 0x0345}); + table.put(0xFA38, new int [] {0x5668}); + table.put(0x1FB6, new int [] {0x03B1, 0x0342}); + table.put(0xFA37, new int [] {0x5606}); + table.put(0xFA36, new int [] {0x559D}); + table.put(0x1FB4, new int [] {0x03B1, 0x0301, 0x0345}); + table.put(0xFA35, new int [] {0x5351}); + table.put(0x1FB3, new int [] {0x03B1, 0x0345}); + table.put(0xFA34, new int [] {0x52E4}); + table.put(0x1FB2, new int [] {0x03B1, 0x0300, 0x0345}); + table.put(0xFA33, new int [] {0x52C9}); + table.put(0x1FB1, new int [] {0x03B1, 0x0304}); + table.put(0xFA32, new int [] {0x514D}); + table.put(0x1FB0, new int [] {0x03B1, 0x0306}); + table.put(0xFA31, new int [] {0x50E7}); + table.put(0xFA30, new int [] {0x4FAE}); + table.put(0x1FAF, new int [] {0x03A9, 0x0314, 0x0342, 0x0345}); + table.put(0x1FAE, new int [] {0x03A9, 0x0313, 0x0342, 0x0345}); + table.put(0x1FAD, new int [] {0x03A9, 0x0314, 0x0301, 0x0345}); + table.put(0x1FAC, new int [] {0x03A9, 0x0313, 0x0301, 0x0345}); + table.put(0xFA2D, new int [] {0x9DB4}); + table.put(0x1FAB, new int [] {0x03A9, 0x0314, 0x0300, 0x0345}); + table.put(0xFA2C, new int [] {0x9928}); + table.put(0x1FAA, new int [] {0x03A9, 0x0313, 0x0300, 0x0345}); + table.put(0xFA2B, new int [] {0x98FC}); + table.put(0xFA2A, new int [] {0x98EF}); + table.put(0x1FA9, new int [] {0x03A9, 0x0314, 0x0345}); + table.put(0x1FA8, new int [] {0x03A9, 0x0313, 0x0345}); + table.put(0x1FA7, new int [] {0x03C9, 0x0314, 0x0342, 0x0345}); + table.put(0x1FA6, new int [] {0x03C9, 0x0313, 0x0342, 0x0345}); + table.put(0x1FA5, new int [] {0x03C9, 0x0314, 0x0301, 0x0345}); + table.put(0xFA26, new int [] {0x90FD}); + table.put(0x1FA4, new int [] {0x03C9, 0x0313, 0x0301, 0x0345}); + table.put(0xFA25, new int [] {0x9038}); + table.put(0x1FA3, new int [] {0x03C9, 0x0314, 0x0300, 0x0345}); + table.put(0x1FA2, new int [] {0x03C9, 0x0313, 0x0300, 0x0345}); + table.put(0x1FA1, new int [] {0x03C9, 0x0314, 0x0345}); + table.put(0xFA22, new int [] {0x8AF8}); + table.put(0x1FA0, new int [] {0x03C9, 0x0313, 0x0345}); + table.put(0xFA20, new int [] {0x8612}); + table.put(0xFA1E, new int [] {0x7FBD}); + table.put(0xFA1D, new int [] {0x7CBE}); + table.put(0xFA1C, new int [] {0x9756}); + table.put(0xFA1B, new int [] {0x798F}); + table.put(0xFA1A, new int [] {0x7965}); + table.put(0xFA19, new int [] {0x795E}); + table.put(0xFA18, new int [] {0x793C}); + table.put(0xFA17, new int [] {0x76CA}); + table.put(0xFA16, new int [] {0x732A}); + table.put(0xFA15, new int [] {0x51DE}); + table.put(0xFA12, new int [] {0x6674}); + table.put(0xFA10, new int [] {0x585A}); + table.put(0xFA0D, new int [] {0x55C0}); + table.put(0xFA0C, new int [] {0x5140}); + table.put(0xFA0B, new int [] {0x5ED3}); + table.put(0xFA0A, new int [] {0x898B}); + table.put(0x212B, new int [] {0x0041, 0x030A}); + table.put(0x212A, new int [] {0x004B}); + table.put(0xFA09, new int [] {0x964D}); + table.put(0xFA08, new int [] {0x884C}); + table.put(0xFA07, new int [] {0x8F3B}); + table.put(0xFA06, new int [] {0x66B4}); + table.put(0xFA05, new int [] {0x6D1E}); + table.put(0xFA04, new int [] {0x5B85}); + table.put(0xFA03, new int [] {0x7CD6}); + table.put(0xFA02, new int [] {0x62D3}); + table.put(0x2126, new int [] {0x03A9}); + table.put(0xFA01, new int [] {0x5EA6}); + table.put(0xFA00, new int [] {0x5207}); + table.put(0x0B94, new int [] {0x0B92, 0x0BD7}); + table.put(0x1026, new int [] {0x1025, 0x102E}); + table.put(0x0B5D, new int [] {0x0B22, 0x0B3C}); + table.put(0x0B5C, new int [] {0x0B21, 0x0B3C}); + table.put(0x1F9F, new int [] {0x0397, 0x0314, 0x0342, 0x0345}); + table.put(0x1F9E, new int [] {0x0397, 0x0313, 0x0342, 0x0345}); + table.put(0x1F9D, new int [] {0x0397, 0x0314, 0x0301, 0x0345}); + table.put(0x1F9C, new int [] {0x0397, 0x0313, 0x0301, 0x0345}); + table.put(0x1F9B, new int [] {0x0397, 0x0314, 0x0300, 0x0345}); + table.put(0x1F9A, new int [] {0x0397, 0x0313, 0x0300, 0x0345}); + table.put(0x0B4C, new int [] {0x0B47, 0x0B57}); + table.put(0x0B4B, new int [] {0x0B47, 0x0B3E}); + table.put(0x1F99, new int [] {0x0397, 0x0314, 0x0345}); + table.put(0x1F98, new int [] {0x0397, 0x0313, 0x0345}); + table.put(0x1F97, new int [] {0x03B7, 0x0314, 0x0342, 0x0345}); + table.put(0x1F96, new int [] {0x03B7, 0x0313, 0x0342, 0x0345}); + table.put(0x1F95, new int [] {0x03B7, 0x0314, 0x0301, 0x0345}); + table.put(0x1F94, new int [] {0x03B7, 0x0313, 0x0301, 0x0345}); + table.put(0x1F93, new int [] {0x03B7, 0x0314, 0x0300, 0x0345}); + table.put(0x1F92, new int [] {0x03B7, 0x0313, 0x0300, 0x0345}); + table.put(0x1F91, new int [] {0x03B7, 0x0314, 0x0345}); + table.put(0x1F90, new int [] {0x03B7, 0x0313, 0x0345}); + table.put(0x0B48, new int [] {0x0B47, 0x0B56}); + table.put(0x1F8F, new int [] {0x0391, 0x0314, 0x0342, 0x0345}); + table.put(0x1F8E, new int [] {0x0391, 0x0313, 0x0342, 0x0345}); + table.put(0x1F8D, new int [] {0x0391, 0x0314, 0x0301, 0x0345}); + table.put(0x1F8C, new int [] {0x0391, 0x0313, 0x0301, 0x0345}); + table.put(0x1F8B, new int [] {0x0391, 0x0314, 0x0300, 0x0345}); + table.put(0x1F8A, new int [] {0x0391, 0x0313, 0x0300, 0x0345}); + table.put(0x1F89, new int [] {0x0391, 0x0314, 0x0345}); + table.put(0x1F88, new int [] {0x0391, 0x0313, 0x0345}); + table.put(0x1F87, new int [] {0x03B1, 0x0314, 0x0342, 0x0345}); + table.put(0x1F86, new int [] {0x03B1, 0x0313, 0x0342, 0x0345}); + table.put(0x1F85, new int [] {0x03B1, 0x0314, 0x0301, 0x0345}); + table.put(0x1F84, new int [] {0x03B1, 0x0313, 0x0301, 0x0345}); + table.put(0x1F83, new int [] {0x03B1, 0x0314, 0x0300, 0x0345}); + table.put(0x1F82, new int [] {0x03B1, 0x0313, 0x0300, 0x0345}); + table.put(0x1F81, new int [] {0x03B1, 0x0314, 0x0345}); + table.put(0x1F80, new int [] {0x03B1, 0x0313, 0x0345}); + table.put(0x1F7D, new int [] {0x03C9, 0x0301}); + table.put(0x1F7C, new int [] {0x03C9, 0x0300}); + table.put(0x1F7B, new int [] {0x03C5, 0x0301}); + table.put(0x1F7A, new int [] {0x03C5, 0x0300}); + table.put(0x1F79, new int [] {0x03BF, 0x0301}); + table.put(0x1F78, new int [] {0x03BF, 0x0300}); + table.put(0x1F77, new int [] {0x03B9, 0x0301}); + table.put(0x1F76, new int [] {0x03B9, 0x0300}); + table.put(0x1F75, new int [] {0x03B7, 0x0301}); + table.put(0x0233, new int [] {0x0079, 0x0304}); + table.put(0x1F74, new int [] {0x03B7, 0x0300}); + table.put(0x0232, new int [] {0x0059, 0x0304}); + table.put(0x1F73, new int [] {0x03B5, 0x0301}); + table.put(0x0231, new int [] {0x006F, 0x0307, 0x0304}); + table.put(0x1F72, new int [] {0x03B5, 0x0300}); + table.put(0x0230, new int [] {0x004F, 0x0307, 0x0304}); + table.put(0x1F71, new int [] {0x03B1, 0x0301}); + table.put(0x1F70, new int [] {0x03B1, 0x0300}); + table.put(0x022F, new int [] {0x006F, 0x0307}); + table.put(0x022E, new int [] {0x004F, 0x0307}); + table.put(0x1F6F, new int [] {0x03A9, 0x0314, 0x0342}); + table.put(0x022D, new int [] {0x006F, 0x0303, 0x0304}); + table.put(0x1F6E, new int [] {0x03A9, 0x0313, 0x0342}); + table.put(0x022C, new int [] {0x004F, 0x0303, 0x0304}); + table.put(0x1F6D, new int [] {0x03A9, 0x0314, 0x0301}); + table.put(0x022B, new int [] {0x006F, 0x0308, 0x0304}); + table.put(0x1F6C, new int [] {0x03A9, 0x0313, 0x0301}); + table.put(0x022A, new int [] {0x004F, 0x0308, 0x0304}); + table.put(0x1F6B, new int [] {0x03A9, 0x0314, 0x0300}); + table.put(0x1F6A, new int [] {0x03A9, 0x0313, 0x0300}); + table.put(0x0229, new int [] {0x0065, 0x0327}); + table.put(0x0228, new int [] {0x0045, 0x0327}); + table.put(0x1F69, new int [] {0x03A9, 0x0314}); + table.put(0x0227, new int [] {0x0061, 0x0307}); + table.put(0x1F68, new int [] {0x03A9, 0x0313}); + table.put(0x0226, new int [] {0x0041, 0x0307}); + table.put(0x1F67, new int [] {0x03C9, 0x0314, 0x0342}); + table.put(0x1F66, new int [] {0x03C9, 0x0313, 0x0342}); + table.put(0x1F65, new int [] {0x03C9, 0x0314, 0x0301}); + table.put(0x1F64, new int [] {0x03C9, 0x0313, 0x0301}); + table.put(0x1F63, new int [] {0x03C9, 0x0314, 0x0300}); + table.put(0x1F62, new int [] {0x03C9, 0x0313, 0x0300}); + table.put(0x1F61, new int [] {0x03C9, 0x0314}); + table.put(0x1F60, new int [] {0x03C9, 0x0313}); + table.put(0x021F, new int [] {0x0068, 0x030C}); + table.put(0x021E, new int [] {0x0048, 0x030C}); + table.put(0x1F5F, new int [] {0x03A5, 0x0314, 0x0342}); + table.put(0x1F5D, new int [] {0x03A5, 0x0314, 0x0301}); + table.put(0x021B, new int [] {0x0074, 0x0326}); + table.put(0x021A, new int [] {0x0054, 0x0326}); + table.put(0x1F5B, new int [] {0x03A5, 0x0314, 0x0300}); + table.put(0x0219, new int [] {0x0073, 0x0326}); + table.put(0x0218, new int [] {0x0053, 0x0326}); + table.put(0x1F59, new int [] {0x03A5, 0x0314}); + table.put(0x0217, new int [] {0x0075, 0x0311}); + table.put(0x0216, new int [] {0x0055, 0x0311}); + table.put(0x1F57, new int [] {0x03C5, 0x0314, 0x0342}); + table.put(0x0215, new int [] {0x0075, 0x030F}); + table.put(0x1F56, new int [] {0x03C5, 0x0313, 0x0342}); + table.put(0x0214, new int [] {0x0055, 0x030F}); + table.put(0x1F55, new int [] {0x03C5, 0x0314, 0x0301}); + table.put(0x0213, new int [] {0x0072, 0x0311}); + table.put(0x1F54, new int [] {0x03C5, 0x0313, 0x0301}); + table.put(0x0212, new int [] {0x0052, 0x0311}); + table.put(0x1F53, new int [] {0x03C5, 0x0314, 0x0300}); + table.put(0x0211, new int [] {0x0072, 0x030F}); + table.put(0x1F52, new int [] {0x03C5, 0x0313, 0x0300}); + table.put(0x0210, new int [] {0x0052, 0x030F}); + table.put(0x1F51, new int [] {0x03C5, 0x0314}); + table.put(0x1F50, new int [] {0x03C5, 0x0313}); + table.put(0x020F, new int [] {0x006F, 0x0311}); + table.put(0x020E, new int [] {0x004F, 0x0311}); + table.put(0x020D, new int [] {0x006F, 0x030F}); + table.put(0x020C, new int [] {0x004F, 0x030F}); + table.put(0x1F4D, new int [] {0x039F, 0x0314, 0x0301}); + table.put(0x020B, new int [] {0x0069, 0x0311}); + table.put(0x1F4C, new int [] {0x039F, 0x0313, 0x0301}); + table.put(0x020A, new int [] {0x0049, 0x0311}); + table.put(0x1F4B, new int [] {0x039F, 0x0314, 0x0300}); + table.put(0x1F4A, new int [] {0x039F, 0x0313, 0x0300}); + table.put(0x0209, new int [] {0x0069, 0x030F}); + table.put(0x0208, new int [] {0x0049, 0x030F}); + table.put(0x1F49, new int [] {0x039F, 0x0314}); + table.put(0x0207, new int [] {0x0065, 0x0311}); + table.put(0x1F48, new int [] {0x039F, 0x0313}); + table.put(0x0206, new int [] {0x0045, 0x0311}); + table.put(0x0205, new int [] {0x0065, 0x030F}); + table.put(0x0204, new int [] {0x0045, 0x030F}); + table.put(0x1F45, new int [] {0x03BF, 0x0314, 0x0301}); + table.put(0x0203, new int [] {0x0061, 0x0311}); + table.put(0x1F44, new int [] {0x03BF, 0x0313, 0x0301}); + table.put(0x0202, new int [] {0x0041, 0x0311}); + table.put(0x1F43, new int [] {0x03BF, 0x0314, 0x0300}); + table.put(0x0201, new int [] {0x0061, 0x030F}); + table.put(0x1F42, new int [] {0x03BF, 0x0313, 0x0300}); + table.put(0x0200, new int [] {0x0041, 0x030F}); + table.put(0x1F41, new int [] {0x03BF, 0x0314}); + table.put(0x1F40, new int [] {0x03BF, 0x0313}); + table.put(0x2F9FF, new int [] {0x980B}); + table.put(0x2F9FE, new int [] {0x980B}); + table.put(0x1F3F, new int [] {0x0399, 0x0314, 0x0342}); + table.put(0x2F9FD, new int [] {0x29496}); + table.put(0x1F3E, new int [] {0x0399, 0x0313, 0x0342}); + table.put(0x2F9FC, new int [] {0x4AB2}); + table.put(0x1F3D, new int [] {0x0399, 0x0314, 0x0301}); + table.put(0x2F9FB, new int [] {0x2940A}); + table.put(0x1F3C, new int [] {0x0399, 0x0313, 0x0301}); + table.put(0x2F9FA, new int [] {0x97E0}); + table.put(0x1F3B, new int [] {0x0399, 0x0314, 0x0300}); + table.put(0x1F3A, new int [] {0x0399, 0x0313, 0x0300}); + table.put(0x2F9F9, new int [] {0x4A76}); + table.put(0x2F9F8, new int [] {0x4A6E}); + table.put(0x1F39, new int [] {0x0399, 0x0314}); + table.put(0x2F9F7, new int [] {0x2921A}); + table.put(0x1F38, new int [] {0x0399, 0x0313}); + table.put(0x2F9F6, new int [] {0x29145}); + table.put(0x1F37, new int [] {0x03B9, 0x0314, 0x0342}); + table.put(0x2F9F5, new int [] {0x9723}); + table.put(0x1F36, new int [] {0x03B9, 0x0313, 0x0342}); + table.put(0x2F9F4, new int [] {0x5DB2}); + table.put(0x1F35, new int [] {0x03B9, 0x0314, 0x0301}); + table.put(0x2F9F3, new int [] {0x96C3}); + table.put(0x1F34, new int [] {0x03B9, 0x0313, 0x0301}); + table.put(0x2F9F2, new int [] {0x49E6}); + table.put(0x1F33, new int [] {0x03B9, 0x0314, 0x0300}); + table.put(0x2F9F1, new int [] {0x28D77}); + table.put(0x1F32, new int [] {0x03B9, 0x0313, 0x0300}); + table.put(0x2F9F0, new int [] {0x95B7}); + table.put(0x1F31, new int [] {0x03B9, 0x0314}); + table.put(0x1F30, new int [] {0x03B9, 0x0313}); + table.put(0x2F9EF, new int [] {0x4995}); + table.put(0x2F9EE, new int [] {0x958B}); + table.put(0x1F2F, new int [] {0x0397, 0x0314, 0x0342}); + table.put(0x2F9ED, new int [] {0x28BFA}); + table.put(0x1F2E, new int [] {0x0397, 0x0313, 0x0342}); + table.put(0x2F9EC, new int [] {0x9415}); + table.put(0x1F2D, new int [] {0x0397, 0x0314, 0x0301}); + table.put(0x2F9EB, new int [] {0x93F9}); + table.put(0x1F2C, new int [] {0x0397, 0x0313, 0x0301}); + table.put(0x2F9EA, new int [] {0x927C}); + table.put(0x1F2B, new int [] {0x0397, 0x0314, 0x0300}); + table.put(0x1F2A, new int [] {0x0397, 0x0313, 0x0300}); + table.put(0x2F9E9, new int [] {0x92D8}); + table.put(0x2F9E8, new int [] {0x92D7}); + table.put(0x1F29, new int [] {0x0397, 0x0314}); + table.put(0x2F9E7, new int [] {0x9238}); + table.put(0x1F28, new int [] {0x0397, 0x0313}); + table.put(0x2F9E6, new int [] {0x911B}); + table.put(0x1F27, new int [] {0x03B7, 0x0314, 0x0342}); + table.put(0x2F9E5, new int [] {0x2872E}); + table.put(0x1F26, new int [] {0x03B7, 0x0313, 0x0342}); + table.put(0x2F9E4, new int [] {0x9111}); + table.put(0x1F25, new int [] {0x03B7, 0x0314, 0x0301}); + table.put(0x2F9E3, new int [] {0x90F1}); + table.put(0x1F24, new int [] {0x03B7, 0x0313, 0x0301}); + table.put(0x2F9E2, new int [] {0x9094}); + table.put(0x1F23, new int [] {0x03B7, 0x0314, 0x0300}); + table.put(0x2F9E1, new int [] {0x285ED}); + table.put(0x1F22, new int [] {0x03B7, 0x0313, 0x0300}); + table.put(0x2F9E0, new int [] {0x285D2}); + table.put(0x1F21, new int [] {0x03B7, 0x0314}); + table.put(0x1F20, new int [] {0x03B7, 0x0313}); + table.put(0x2F9DF, new int [] {0x8F38}); + table.put(0x2F9DE, new int [] {0x8ED4}); + table.put(0x2F9DD, new int [] {0x208DE}); + table.put(0x2F9DC, new int [] {0x8DF0}); + table.put(0x1F1D, new int [] {0x0395, 0x0314, 0x0301}); + table.put(0x2F9DB, new int [] {0x8DBC}); + table.put(0x1F1C, new int [] {0x0395, 0x0313, 0x0301}); + table.put(0x2F9DA, new int [] {0x8DCB}); + table.put(0x1F1B, new int [] {0x0395, 0x0314, 0x0300}); + table.put(0x1F1A, new int [] {0x0395, 0x0313, 0x0300}); + table.put(0x2F9D9, new int [] {0x20804}); + table.put(0x2F9D8, new int [] {0x27F2F}); + table.put(0x1F19, new int [] {0x0395, 0x0314}); + table.put(0x2F9D7, new int [] {0x8D77}); + table.put(0x1F18, new int [] {0x0395, 0x0313}); + table.put(0x2F9D6, new int [] {0x8D1B}); + table.put(0x2F9D5, new int [] {0x8CC1}); + table.put(0x2F9D4, new int [] {0x8CAB}); + table.put(0x1F15, new int [] {0x03B5, 0x0314, 0x0301}); + table.put(0x2F9D3, new int [] {0x27CA8}); + table.put(0x1F14, new int [] {0x03B5, 0x0313, 0x0301}); + table.put(0x2F9D2, new int [] {0x8C55}); + table.put(0x1F13, new int [] {0x03B5, 0x0314, 0x0300}); + table.put(0x2F9D1, new int [] {0x8B8A}); + table.put(0x1F12, new int [] {0x03B5, 0x0313, 0x0300}); + table.put(0x2F9D0, new int [] {0x8AED}); + table.put(0x1F11, new int [] {0x03B5, 0x0314}); + table.put(0x1F10, new int [] {0x03B5, 0x0313}); + table.put(0x2F9CF, new int [] {0x8AA0}); + table.put(0x2F9CE, new int [] {0x46C7}); + table.put(0x1F0F, new int [] {0x0391, 0x0314, 0x0342}); + table.put(0x2F9CD, new int [] {0x46BE}); + table.put(0x1F0E, new int [] {0x0391, 0x0313, 0x0342}); + table.put(0x2F9CC, new int [] {0x27966}); + table.put(0x1F0D, new int [] {0x0391, 0x0314, 0x0301}); + table.put(0x2F9CB, new int [] {0x278AE}); + table.put(0x1F0C, new int [] {0x0391, 0x0313, 0x0301}); + table.put(0x2F9CA, new int [] {0x34BB}); + table.put(0x1F0B, new int [] {0x0391, 0x0314, 0x0300}); + table.put(0x1F0A, new int [] {0x0391, 0x0313, 0x0300}); + table.put(0x2F9C9, new int [] {0x88FA}); + table.put(0x2F9C8, new int [] {0x4635}); + table.put(0x1F09, new int [] {0x0391, 0x0314}); + table.put(0x2F9C7, new int [] {0x88DE}); + table.put(0x1F08, new int [] {0x0391, 0x0313}); + table.put(0x2F9C6, new int [] {0x88D7}); + table.put(0x1F07, new int [] {0x03B1, 0x0314, 0x0342}); + table.put(0x2F9C5, new int [] {0x27667}); + table.put(0x1F06, new int [] {0x03B1, 0x0313, 0x0342}); + table.put(0x2F9C4, new int [] {0x8863}); + table.put(0x1F05, new int [] {0x03B1, 0x0314, 0x0301}); + table.put(0x2F9C3, new int [] {0x8860}); + table.put(0x1F04, new int [] {0x03B1, 0x0313, 0x0301}); + table.put(0x2F9C2, new int [] {0x45F9}); + table.put(0x1F03, new int [] {0x03B1, 0x0314, 0x0300}); + table.put(0x2F9C1, new int [] {0x8801}); + table.put(0x1F02, new int [] {0x03B1, 0x0313, 0x0300}); + table.put(0x2F9C0, new int [] {0x87E1}); + table.put(0x1F01, new int [] {0x03B1, 0x0314}); + table.put(0x1F00, new int [] {0x03B1, 0x0313}); + table.put(0x2F9BF, new int [] {0x45D7}); + table.put(0x2F9BE, new int [] {0x8786}); + table.put(0x2F9BD, new int [] {0x876B}); + table.put(0x2F9BC, new int [] {0x8728}); + table.put(0x2F9BB, new int [] {0x8779}); + table.put(0x2F9BA, new int [] {0x86E2}); + table.put(0x2F9B9, new int [] {0x870E}); + table.put(0x2F9B8, new int [] {0x8688}); + table.put(0x2F9B7, new int [] {0x86A9}); + table.put(0x2F9B6, new int [] {0x8669}); + table.put(0x2F9B5, new int [] {0x8667}); + table.put(0x2F9B4, new int [] {0x865C}); + table.put(0x2F9B3, new int [] {0x8650}); + table.put(0x2F9B2, new int [] {0x456B}); + table.put(0x2F9B1, new int [] {0x270D2}); + table.put(0x2F9B0, new int [] {0x26FB1}); + table.put(0x2F9AF, new int [] {0x4561}); + table.put(0x2F9AE, new int [] {0x455D}); + table.put(0x2F9AD, new int [] {0x26F2C}); + table.put(0x2F9AC, new int [] {0x8564}); + table.put(0x2F9AB, new int [] {0x273CA}); + table.put(0x2F9AA, new int [] {0x8516}); + table.put(0x2F9A9, new int [] {0x84F3}); + table.put(0x2F9A8, new int [] {0x84F1}); + table.put(0x2F9A7, new int [] {0x452B}); + table.put(0x2F9A6, new int [] {0x26CD5}); + table.put(0x2F9A5, new int [] {0x26D6B}); + table.put(0x2F9A4, new int [] {0x26C36}); + table.put(0x2F9A3, new int [] {0x83DC}); + table.put(0x2F9A2, new int [] {0x83CC}); + table.put(0x2F9A1, new int [] {0x83CA}); + table.put(0x2F9A0, new int [] {0x8353}); + table.put(0x01FF, new int [] {0x00F8, 0x0301}); + table.put(0x01FE, new int [] {0x00D8, 0x0301}); + table.put(0x01FD, new int [] {0x00E6, 0x0301}); + table.put(0x01FC, new int [] {0x00C6, 0x0301}); + table.put(0x01FB, new int [] {0x0061, 0x030A, 0x0301}); + table.put(0x01FA, new int [] {0x0041, 0x030A, 0x0301}); + table.put(0x01F9, new int [] {0x006E, 0x0300}); + table.put(0x01F8, new int [] {0x004E, 0x0300}); + table.put(0x01F5, new int [] {0x0067, 0x0301}); + table.put(0x01F4, new int [] {0x0047, 0x0301}); + table.put(0x01F0, new int [] {0x006A, 0x030C}); + table.put(0x01EF, new int [] {0x0292, 0x030C}); + table.put(0x01EE, new int [] {0x01B7, 0x030C}); + table.put(0x01ED, new int [] {0x006F, 0x0328, 0x0304}); + table.put(0x01EC, new int [] {0x004F, 0x0328, 0x0304}); + table.put(0x01EB, new int [] {0x006F, 0x0328}); + table.put(0x01EA, new int [] {0x004F, 0x0328}); + table.put(0x01E9, new int [] {0x006B, 0x030C}); + table.put(0x01E8, new int [] {0x004B, 0x030C}); + table.put(0x01E7, new int [] {0x0067, 0x030C}); + table.put(0x01E6, new int [] {0x0047, 0x030C}); + table.put(0x01E3, new int [] {0x00E6, 0x0304}); + table.put(0x01E2, new int [] {0x00C6, 0x0304}); + table.put(0x01E1, new int [] {0x0061, 0x0307, 0x0304}); + table.put(0x01E0, new int [] {0x0041, 0x0307, 0x0304}); + table.put(0x01DF, new int [] {0x0061, 0x0308, 0x0304}); + table.put(0x01DE, new int [] {0x0041, 0x0308, 0x0304}); + table.put(0x01DC, new int [] {0x0075, 0x0308, 0x0300}); + table.put(0x01DB, new int [] {0x0055, 0x0308, 0x0300}); + table.put(0x01DA, new int [] {0x0075, 0x0308, 0x030C}); + table.put(0x01D9, new int [] {0x0055, 0x0308, 0x030C}); + table.put(0x01D8, new int [] {0x0075, 0x0308, 0x0301}); + table.put(0x01D7, new int [] {0x0055, 0x0308, 0x0301}); + table.put(0x01D6, new int [] {0x0075, 0x0308, 0x0304}); + table.put(0x01D5, new int [] {0x0055, 0x0308, 0x0304}); + table.put(0x01D4, new int [] {0x0075, 0x030C}); + table.put(0x01D3, new int [] {0x0055, 0x030C}); + table.put(0x01D2, new int [] {0x006F, 0x030C}); + table.put(0x01D1, new int [] {0x004F, 0x030C}); + table.put(0x01D0, new int [] {0x0069, 0x030C}); + table.put(0x01CF, new int [] {0x0049, 0x030C}); + table.put(0x01CE, new int [] {0x0061, 0x030C}); + table.put(0x01CD, new int [] {0x0041, 0x030C}); + table.put(0x1EF9, new int [] {0x0079, 0x0303}); + table.put(0x1EF8, new int [] {0x0059, 0x0303}); + table.put(0x1EF7, new int [] {0x0079, 0x0309}); + table.put(0x1EF6, new int [] {0x0059, 0x0309}); + table.put(0x1EF5, new int [] {0x0079, 0x0323}); + table.put(0x1EF4, new int [] {0x0059, 0x0323}); + table.put(0x1EF3, new int [] {0x0079, 0x0300}); + table.put(0x1EF2, new int [] {0x0059, 0x0300}); + table.put(0x01B0, new int [] {0x0075, 0x031B}); + table.put(0x1EF1, new int [] {0x0075, 0x031B, 0x0323}); + table.put(0x1EF0, new int [] {0x0055, 0x031B, 0x0323}); + table.put(0x2F99F, new int [] {0x8457}); + table.put(0x01AF, new int [] {0x0055, 0x031B}); + table.put(0x2F99E, new int [] {0x83E7}); + table.put(0x1EEF, new int [] {0x0075, 0x031B, 0x0303}); + table.put(0x2F99D, new int [] {0x83BD}); + table.put(0x1EEE, new int [] {0x0055, 0x031B, 0x0303}); + table.put(0x2F99C, new int [] {0x8323}); + table.put(0x1EED, new int [] {0x0075, 0x031B, 0x0309}); + table.put(0x2F99B, new int [] {0x83AD}); + table.put(0x1EEC, new int [] {0x0055, 0x031B, 0x0309}); + table.put(0x2F99A, new int [] {0x8363}); + table.put(0x1EEB, new int [] {0x0075, 0x031B, 0x0300}); + table.put(0x1EEA, new int [] {0x0055, 0x031B, 0x0300}); + table.put(0x2F999, new int [] {0x831D}); + table.put(0x2F998, new int [] {0x82E5}); + table.put(0x1EE9, new int [] {0x0075, 0x031B, 0x0301}); + table.put(0x2F997, new int [] {0x26B3C}); + table.put(0x1EE8, new int [] {0x0055, 0x031B, 0x0301}); + table.put(0x2F996, new int [] {0x82E6}); + table.put(0x1EE7, new int [] {0x0075, 0x0309}); + table.put(0x2F995, new int [] {0x82BD}); + table.put(0x1EE6, new int [] {0x0055, 0x0309}); + table.put(0x2F994, new int [] {0x82B3}); + table.put(0x1EE5, new int [] {0x0075, 0x0323}); + table.put(0x2F993, new int [] {0x82B1}); + table.put(0x1EE4, new int [] {0x0055, 0x0323}); + table.put(0x2F992, new int [] {0x52B3}); + table.put(0x1EE3, new int [] {0x006F, 0x031B, 0x0323}); + table.put(0x2F991, new int [] {0x829D}); + table.put(0x01A1, new int [] {0x006F, 0x031B}); + table.put(0x1EE2, new int [] {0x004F, 0x031B, 0x0323}); + table.put(0x2F990, new int [] {0x828B}); + table.put(0x01A0, new int [] {0x004F, 0x031B}); + table.put(0x1EE1, new int [] {0x006F, 0x031B, 0x0303}); + table.put(0x1EE0, new int [] {0x004F, 0x031B, 0x0303}); + table.put(0x2F98F, new int [] {0x8291}); + table.put(0x2F98E, new int [] {0x446B}); + table.put(0x1EDF, new int [] {0x006F, 0x031B, 0x0309}); + table.put(0x2F98D, new int [] {0x8F9E}); + table.put(0x1EDE, new int [] {0x004F, 0x031B, 0x0309}); + table.put(0x2F98C, new int [] {0x8204}); + table.put(0x1EDD, new int [] {0x006F, 0x031B, 0x0300}); + table.put(0x2F98B, new int [] {0x8201}); + table.put(0x1EDC, new int [] {0x004F, 0x031B, 0x0300}); + table.put(0x2F98A, new int [] {0x2339C}); + table.put(0x1EDB, new int [] {0x006F, 0x031B, 0x0301}); + table.put(0x1EDA, new int [] {0x004F, 0x031B, 0x0301}); + table.put(0x2F989, new int [] {0x23393}); + table.put(0x2F988, new int [] {0x267B5}); + table.put(0x1ED9, new int [] {0x006F, 0x0323, 0x0302}); + table.put(0x2F987, new int [] {0x267A7}); + table.put(0x04F9, new int [] {0x044B, 0x0308}); + table.put(0x1ED8, new int [] {0x004F, 0x0323, 0x0302}); + table.put(0x2F986, new int [] {0x5AB5}); + table.put(0x04F8, new int [] {0x042B, 0x0308}); + table.put(0x1ED7, new int [] {0x006F, 0x0302, 0x0303}); + table.put(0x2F985, new int [] {0x813E}); + table.put(0x1ED6, new int [] {0x004F, 0x0302, 0x0303}); + table.put(0x2F984, new int [] {0x440B}); + table.put(0x1ED5, new int [] {0x006F, 0x0302, 0x0309}); + table.put(0x2F983, new int [] {0x8103}); + table.put(0x04F5, new int [] {0x0447, 0x0308}); + table.put(0x1ED4, new int [] {0x004F, 0x0302, 0x0309}); + table.put(0x2F982, new int [] {0x80B2}); + table.put(0x04F4, new int [] {0x0427, 0x0308}); + table.put(0x1ED3, new int [] {0x006F, 0x0302, 0x0300}); + table.put(0x2F981, new int [] {0x43D5}); + table.put(0x04F3, new int [] {0x0443, 0x030B}); + table.put(0x1ED2, new int [] {0x004F, 0x0302, 0x0300}); + table.put(0x2F980, new int [] {0x2335F}); + table.put(0x04F2, new int [] {0x0423, 0x030B}); + table.put(0x1ED1, new int [] {0x006F, 0x0302, 0x0301}); + table.put(0x04F1, new int [] {0x0443, 0x0308}); + table.put(0x1ED0, new int [] {0x004F, 0x0302, 0x0301}); + table.put(0x04F0, new int [] {0x0423, 0x0308}); + table.put(0x2F97F, new int [] {0x8070}); + table.put(0x2F97E, new int [] {0x265A8}); + table.put(0x1ECF, new int [] {0x006F, 0x0309}); + table.put(0x2F97D, new int [] {0x8060}); + table.put(0x04EF, new int [] {0x0443, 0x0304}); + table.put(0x1ECE, new int [] {0x004F, 0x0309}); + table.put(0x2F97C, new int [] {0x26523}); + table.put(0x04EE, new int [] {0x0423, 0x0304}); + table.put(0x1ECD, new int [] {0x006F, 0x0323}); + table.put(0x2F97B, new int [] {0x264DA}); + table.put(0x04ED, new int [] {0x044D, 0x0308}); + table.put(0x1ECC, new int [] {0x004F, 0x0323}); + table.put(0x2F97A, new int [] {0x8005}); + table.put(0x04EC, new int [] {0x042D, 0x0308}); + table.put(0x1ECB, new int [] {0x0069, 0x0323}); + table.put(0x04EB, new int [] {0x04E9, 0x0308}); + table.put(0x1ECA, new int [] {0x0049, 0x0323}); + table.put(0x04EA, new int [] {0x04E8, 0x0308}); + table.put(0x2F979, new int [] {0x7FFA}); + table.put(0x0DDE, new int [] {0x0DD9, 0x0DDF}); + table.put(0x2F978, new int [] {0x7F95}); + table.put(0x0DDD, new int [] {0x0DD9, 0x0DCF, 0x0DCA}); + table.put(0x1EC9, new int [] {0x0069, 0x0309}); + table.put(0x2F977, new int [] {0x2633E}); + table.put(0x0DDC, new int [] {0x0DD9, 0x0DCF}); + table.put(0x1EC8, new int [] {0x0049, 0x0309}); + table.put(0x2F976, new int [] {0x7F7A}); + table.put(0x1EC7, new int [] {0x0065, 0x0323, 0x0302}); + table.put(0x2F975, new int [] {0x262D9}); + table.put(0x04E7, new int [] {0x043E, 0x0308}); + table.put(0x0DDA, new int [] {0x0DD9, 0x0DCA}); + table.put(0x1EC6, new int [] {0x0045, 0x0323, 0x0302}); + table.put(0x2F974, new int [] {0x4359}); + table.put(0x04E6, new int [] {0x041E, 0x0308}); + table.put(0x1EC5, new int [] {0x0065, 0x0302, 0x0303}); + table.put(0x2F973, new int [] {0x26247}); + table.put(0x04E5, new int [] {0x0438, 0x0308}); + table.put(0x1EC4, new int [] {0x0045, 0x0302, 0x0303}); + table.put(0x2F972, new int [] {0x26228}); + table.put(0x04E4, new int [] {0x0418, 0x0308}); + table.put(0x1EC3, new int [] {0x0065, 0x0302, 0x0309}); + table.put(0x2F971, new int [] {0x4334}); + table.put(0x04E3, new int [] {0x0438, 0x0304}); + table.put(0x1EC2, new int [] {0x0045, 0x0302, 0x0309}); + table.put(0x2F970, new int [] {0x7E45}); + table.put(0x04E2, new int [] {0x0418, 0x0304}); + table.put(0x1EC1, new int [] {0x0065, 0x0302, 0x0300}); + table.put(0x1EC0, new int [] {0x0045, 0x0302, 0x0300}); + table.put(0xF9FF, new int [] {0x523A}); + table.put(0xF9FE, new int [] {0x8336}); + table.put(0x2F96F, new int [] {0x7E02}); + table.put(0xF9FD, new int [] {0x4EC0}); + table.put(0x2F96E, new int [] {0x7DC7}); + table.put(0x1EBF, new int [] {0x0065, 0x0302, 0x0301}); + table.put(0xF9FC, new int [] {0x8B58}); + table.put(0x2F96D, new int [] {0x4301}); + table.put(0x04DF, new int [] {0x0437, 0x0308}); + table.put(0x1EBE, new int [] {0x0045, 0x0302, 0x0301}); + table.put(0xF9FB, new int [] {0x7099}); + table.put(0x2F96C, new int [] {0x7D63}); + table.put(0x04DE, new int [] {0x0417, 0x0308}); + table.put(0x1EBD, new int [] {0x0065, 0x0303}); + table.put(0xF9FA, new int [] {0x72C0}); + table.put(0x2F96B, new int [] {0x25F86}); + table.put(0x04DD, new int [] {0x0436, 0x0308}); + table.put(0x1EBC, new int [] {0x0045, 0x0303}); + table.put(0x2F96A, new int [] {0x7D00}); + table.put(0x04DC, new int [] {0x0416, 0x0308}); + table.put(0x1EBB, new int [] {0x0065, 0x0309}); + table.put(0x04DB, new int [] {0x04D9, 0x0308}); + table.put(0x1EBA, new int [] {0x0045, 0x0309}); + table.put(0x04DA, new int [] {0x04D8, 0x0308}); + table.put(0xF9F9, new int [] {0x7C92}); + table.put(0xF9F8, new int [] {0x7B20}); + table.put(0x2F969, new int [] {0x7CE3}); + table.put(0xF9F7, new int [] {0x7ACB}); + table.put(0x2F968, new int [] {0x7CE8}); + table.put(0x1EB9, new int [] {0x0065, 0x0323}); + table.put(0xF9F6, new int [] {0x81E8}); + table.put(0x2F967, new int [] {0x42A0}); + table.put(0x1EB8, new int [] {0x0045, 0x0323}); + table.put(0xF9F5, new int [] {0x6DCB}); + table.put(0x2F966, new int [] {0x7CD2}); + table.put(0x1EB7, new int [] {0x0061, 0x0323, 0x0306}); + table.put(0xF9F4, new int [] {0x6797}); + table.put(0x2F965, new int [] {0x25C80}); + table.put(0x04D7, new int [] {0x0435, 0x0306}); + table.put(0x1EB6, new int [] {0x0041, 0x0323, 0x0306}); + table.put(0xF9F3, new int [] {0x9E9F}); + table.put(0x2F964, new int [] {0x4227}); + table.put(0x04D6, new int [] {0x0415, 0x0306}); + table.put(0x1EB5, new int [] {0x0061, 0x0306, 0x0303}); + table.put(0xF9F2, new int [] {0x9C57}); + table.put(0x2F963, new int [] {0x7BC9}); + table.put(0x1EB4, new int [] {0x0041, 0x0306, 0x0303}); + table.put(0xF9F1, new int [] {0x96A3}); + table.put(0x2F962, new int [] {0x7BC6}); + table.put(0x1EB3, new int [] {0x0061, 0x0306, 0x0309}); + table.put(0xF9F0, new int [] {0x85FA}); + table.put(0x2F961, new int [] {0x25BAB}); + table.put(0x04D3, new int [] {0x0430, 0x0308}); + table.put(0x1EB2, new int [] {0x0041, 0x0306, 0x0309}); + table.put(0x2F960, new int [] {0x4202}); + table.put(0x04D2, new int [] {0x0410, 0x0308}); + table.put(0x1EB1, new int [] {0x0061, 0x0306, 0x0300}); + table.put(0x04D1, new int [] {0x0430, 0x0306}); + table.put(0x1EB0, new int [] {0x0041, 0x0306, 0x0300}); + table.put(0x04D0, new int [] {0x0410, 0x0306}); + table.put(0xF9EF, new int [] {0x7498}); + table.put(0xF9EE, new int [] {0x71D0}); + table.put(0x2F95F, new int [] {0x7AEE}); + table.put(0xF9ED, new int [] {0x541D}); + table.put(0x2F95E, new int [] {0x25AA7}); + table.put(0x1EAF, new int [] {0x0061, 0x0306, 0x0301}); + table.put(0xF9EC, new int [] {0x6EBA}); + table.put(0x2F95D, new int [] {0x25AA7}); + table.put(0x1EAE, new int [] {0x0041, 0x0306, 0x0301}); + table.put(0xF9EB, new int [] {0x533F}); + table.put(0x2F95C, new int [] {0x2597C}); + table.put(0x1EAD, new int [] {0x0061, 0x0323, 0x0302}); + table.put(0xF9EA, new int [] {0x96E2}); + table.put(0x2F95B, new int [] {0x7A4F}); + table.put(0x1EAC, new int [] {0x0041, 0x0323, 0x0302}); + table.put(0x2F95A, new int [] {0x7A4A}); + table.put(0x1EAB, new int [] {0x0061, 0x0302, 0x0303}); + table.put(0x1EAA, new int [] {0x0041, 0x0302, 0x0303}); + table.put(0xF9E9, new int [] {0x91CC}); + table.put(0xF9E8, new int [] {0x88E1}); + table.put(0x2F959, new int [] {0x7A40}); + table.put(0xF9E7, new int [] {0x88CF}); + table.put(0x2F958, new int [] {0x412F}); + table.put(0x1EA9, new int [] {0x0061, 0x0302, 0x0309}); + table.put(0xF9E6, new int [] {0x7F79}); + table.put(0x2F957, new int [] {0x79EB}); + table.put(0x1EA8, new int [] {0x0041, 0x0302, 0x0309}); + table.put(0xF9E5, new int [] {0x75E2}); + table.put(0x2F956, new int [] {0x798F}); + table.put(0x1EA7, new int [] {0x0061, 0x0302, 0x0300}); + table.put(0xF9E4, new int [] {0x7406}); + table.put(0x2F955, new int [] {0x256C5}); + table.put(0x1EA6, new int [] {0x0041, 0x0302, 0x0300}); + table.put(0xF9E3, new int [] {0x6CE5}); + table.put(0x2F954, new int [] {0x2569A}); + table.put(0x1EA5, new int [] {0x0061, 0x0302, 0x0301}); + table.put(0xF9E2, new int [] {0x68A8}); + table.put(0x2F953, new int [] {0x7956}); + table.put(0x1EA4, new int [] {0x0041, 0x0302, 0x0301}); + table.put(0xF9E1, new int [] {0x674E}); + table.put(0x2F952, new int [] {0x25626}); + table.put(0x1EA3, new int [] {0x0061, 0x0309}); + table.put(0xF9E0, new int [] {0x6613}); + table.put(0x2F951, new int [] {0x40E3}); + table.put(0x1EA2, new int [] {0x0041, 0x0309}); + table.put(0x2F950, new int [] {0x78CC}); + table.put(0x04C2, new int [] {0x0436, 0x0306}); + table.put(0x1EA1, new int [] {0x0061, 0x0323}); + table.put(0x04C1, new int [] {0x0416, 0x0306}); + table.put(0x1EA0, new int [] {0x0041, 0x0323}); + table.put(0xF9DF, new int [] {0x5C65}); + table.put(0xF9DE, new int [] {0x540F}); + table.put(0x2F94F, new int [] {0x788C}); + table.put(0xF9DD, new int [] {0x5229}); + table.put(0x2F94E, new int [] {0x784E}); + table.put(0xF9DC, new int [] {0x9686}); + table.put(0x2F94D, new int [] {0x2541D}); + table.put(0xF9DB, new int [] {0x7387}); + table.put(0x2F94C, new int [] {0x4096}); + table.put(0xF9DA, new int [] {0x6817}); + table.put(0x2F94B, new int [] {0x4046}); + table.put(0x2F94A, new int [] {0x778B}); + table.put(0xF9D9, new int [] {0x6144}); + table.put(0xF9D8, new int [] {0x5F8B}); + table.put(0x2F949, new int [] {0x4039}); + table.put(0xF9D7, new int [] {0x8F2A}); + table.put(0x2F948, new int [] {0x774A}); + table.put(0xF9D6, new int [] {0x6DEA}); + table.put(0x2F947, new int [] {0x771F}); + table.put(0xF9D5, new int [] {0x5D19}); + table.put(0x2F946, new int [] {0x771F}); + table.put(0xF9D4, new int [] {0x502B}); + table.put(0x2F945, new int [] {0x771E}); + table.put(0xF9D3, new int [] {0x9678}); + table.put(0x2F944, new int [] {0x25133}); + table.put(0xF9D2, new int [] {0x622E}); + table.put(0x2F943, new int [] {0x25119}); + table.put(0xF9D1, new int [] {0x516D}); + table.put(0x2F942, new int [] {0x250F2}); + table.put(0xF9D0, new int [] {0x985E}); + table.put(0x2F941, new int [] {0x250F3}); + table.put(0x2F940, new int [] {0x76F4}); + table.put(0xF9CF, new int [] {0x7D10}); + table.put(0xF9CE, new int [] {0x786B}); + table.put(0x2F93F, new int [] {0x4008}); + table.put(0xF9CD, new int [] {0x7559}); + table.put(0x2F93E, new int [] {0x3FFC}); + table.put(0xF9CC, new int [] {0x7409}); + table.put(0x2F93D, new int [] {0x25044}); + table.put(0xF9CB, new int [] {0x6E9C}); + table.put(0x2F93C, new int [] {0x24FB8}); + table.put(0xF9CA, new int [] {0x6D41}); + table.put(0x2F93B, new int [] {0x24FA1}); + table.put(0x2F93A, new int [] {0x7610}); + table.put(0xF9C9, new int [] {0x67F3}); + table.put(0xF9C8, new int [] {0x677B}); + table.put(0x2F939, new int [] {0x2219F}); + table.put(0xF9C7, new int [] {0x5289}); + table.put(0x2F938, new int [] {0x7570}); + table.put(0xF9C6, new int [] {0x962E}); + table.put(0x2F937, new int [] {0x24C92}); + table.put(0xF9C5, new int [] {0x6688}); + table.put(0x2F936, new int [] {0x753E}); + table.put(0xF9C4, new int [] {0x9F8D}); + table.put(0x2F935, new int [] {0x24C36}); + table.put(0xF9C3, new int [] {0x907C}); + table.put(0x2F934, new int [] {0x7524}); + table.put(0xF9C2, new int [] {0x84FC}); + table.put(0x2F933, new int [] {0x3F1B}); + table.put(0xF9C1, new int [] {0x7642}); + table.put(0x2F932, new int [] {0x74CA}); + table.put(0xF9C0, new int [] {0x71CE}); + table.put(0x2F931, new int [] {0x7485}); + table.put(0x2F930, new int [] {0x7471}); + table.put(0xF9BF, new int [] {0x6A02}); + table.put(0xF9BE, new int [] {0x6599}); + table.put(0x2F92F, new int [] {0x745C}); + table.put(0xF9BD, new int [] {0x5C3F}); + table.put(0x2F92E, new int [] {0x7447}); + table.put(0xF9BC, new int [] {0x5BEE}); + table.put(0x2F92D, new int [] {0x3EB8}); + table.put(0xF9BB, new int [] {0x50DA}); + table.put(0x2F92C, new int [] {0x3EB8}); + table.put(0xF9BA, new int [] {0x4E86}); + table.put(0x2F92B, new int [] {0x73A5}); + table.put(0x2F92A, new int [] {0x3EAC}); + table.put(0xF9B9, new int [] {0x60E1}); + table.put(0xF9B8, new int [] {0x96B8}); + table.put(0x2F929, new int [] {0x738B}); + table.put(0xF9B7, new int [] {0x91B4}); + table.put(0x2F928, new int [] {0x737A}); + table.put(0xF9B6, new int [] {0x79AE}); + table.put(0x2F927, new int [] {0x24814}); + table.put(0xF9B5, new int [] {0x4F8B}); + table.put(0x2F926, new int [] {0x24735}); + table.put(0xF9B4, new int [] {0x9818}); + table.put(0x2F925, new int [] {0x7295}); + table.put(0xF9B3, new int [] {0x9748}); + table.put(0x2F924, new int [] {0x7280}); + table.put(0xF9B2, new int [] {0x96F6}); + table.put(0x2F923, new int [] {0x24608}); + table.put(0xF9B1, new int [] {0x9234}); + table.put(0x2F922, new int [] {0x7250}); + table.put(0xF9B0, new int [] {0x8046}); + table.put(0x2F921, new int [] {0x7235}); + table.put(0x2F920, new int [] {0x7228}); + table.put(0xF9AF, new int [] {0x7F9A}); + table.put(0xF9AE, new int [] {0x7469}); + table.put(0x2F91F, new int [] {0x243AB}); + table.put(0xF9AD, new int [] {0x73B2}); + table.put(0x2F91E, new int [] {0x719C}); + table.put(0xF9AC, new int [] {0x601C}); + table.put(0x2F91D, new int [] {0x24263}); + table.put(0xF9AB, new int [] {0x5DBA}); + table.put(0x2F91C, new int [] {0x7145}); + table.put(0xF9AA, new int [] {0x5BE7}); + table.put(0x2F91B, new int [] {0x20525}); + table.put(0x2F91A, new int [] {0x70AD}); + table.put(0xF9A9, new int [] {0x56F9}); + table.put(0xF9A8, new int [] {0x4EE4}); + table.put(0x2F919, new int [] {0x7077}); + table.put(0xF9A7, new int [] {0x7375}); + table.put(0x2F918, new int [] {0x707D}); + table.put(0xF9A6, new int [] {0x7C3E}); + table.put(0x2F917, new int [] {0x704A}); + table.put(0xF9A5, new int [] {0x6BAE}); + table.put(0x2F916, new int [] {0x3D96}); + table.put(0xF9A4, new int [] {0x637B}); + table.put(0x2F915, new int [] {0x701B}); + table.put(0xF9A3, new int [] {0x5FF5}); + table.put(0x2F914, new int [] {0x701E}); + table.put(0xF9A2, new int [] {0x5EC9}); + table.put(0x2F913, new int [] {0x7039}); + table.put(0xF9A1, new int [] {0x8AAA}); + table.put(0x2F912, new int [] {0x6FC6}); + table.put(0xF9A0, new int [] {0x88C2}); + table.put(0x2F911, new int [] {0x23F8E}); + table.put(0x2F910, new int [] {0x23F5E}); + table.put(0x2001, new int [] {0x2003}); + table.put(0x2000, new int [] {0x2002}); + table.put(0x2F90F, new int [] {0x6F6E}); + table.put(0x2F90E, new int [] {0x6DF9}); + table.put(0x2F90D, new int [] {0x23ED1}); + table.put(0x2F90C, new int [] {0x6EC7}); + table.put(0x2F90B, new int [] {0x6ECB}); + table.put(0x2F90A, new int [] {0x3D33}); + table.put(0x2F909, new int [] {0x6E6E}); + table.put(0x2F908, new int [] {0x6E2F}); + table.put(0x2F907, new int [] {0x6D34}); + table.put(0x2F906, new int [] {0x23D1E}); + table.put(0x2F905, new int [] {0x6D85}); + table.put(0x2F904, new int [] {0x6D78}); + table.put(0x2F903, new int [] {0x6D69}); + table.put(0x2F902, new int [] {0x6D41}); + table.put(0x2F901, new int [] {0x6D77}); + table.put(0x2F900, new int [] {0x6D3E}); + table.put(0x017E, new int [] {0x007A, 0x030C}); + table.put(0x017D, new int [] {0x005A, 0x030C}); + table.put(0x017C, new int [] {0x007A, 0x0307}); + table.put(0x017B, new int [] {0x005A, 0x0307}); + table.put(0x017A, new int [] {0x007A, 0x0301}); + table.put(0x0179, new int [] {0x005A, 0x0301}); + table.put(0x0178, new int [] {0x0059, 0x0308}); + table.put(0x0177, new int [] {0x0079, 0x0302}); + table.put(0x0176, new int [] {0x0059, 0x0302}); + table.put(0x0175, new int [] {0x0077, 0x0302}); + table.put(0x0174, new int [] {0x0057, 0x0302}); + table.put(0x0173, new int [] {0x0075, 0x0328}); + table.put(0x0172, new int [] {0x0055, 0x0328}); + table.put(0x0171, new int [] {0x0075, 0x030B}); + table.put(0x0170, new int [] {0x0055, 0x030B}); + table.put(0x016F, new int [] {0x0075, 0x030A}); + table.put(0x016E, new int [] {0x0055, 0x030A}); + table.put(0x016D, new int [] {0x0075, 0x0306}); + table.put(0x016C, new int [] {0x0055, 0x0306}); + table.put(0x016B, new int [] {0x0075, 0x0304}); + table.put(0x016A, new int [] {0x0055, 0x0304}); + table.put(0x0A5E, new int [] {0x0A2B, 0x0A3C}); + table.put(0x0169, new int [] {0x0075, 0x0303}); + table.put(0x0168, new int [] {0x0055, 0x0303}); + table.put(0x0A5B, new int [] {0x0A1C, 0x0A3C}); + table.put(0x0A5A, new int [] {0x0A17, 0x0A3C}); + table.put(0x0165, new int [] {0x0074, 0x030C}); + table.put(0x0164, new int [] {0x0054, 0x030C}); + table.put(0x0163, new int [] {0x0074, 0x0327}); + table.put(0x0162, new int [] {0x0054, 0x0327}); + table.put(0x0161, new int [] {0x0073, 0x030C}); + table.put(0x0160, new int [] {0x0053, 0x030C}); + table.put(0x0A59, new int [] {0x0A16, 0x0A3C}); + table.put(0x015F, new int [] {0x0073, 0x0327}); + table.put(0x015E, new int [] {0x0053, 0x0327}); + table.put(0x015D, new int [] {0x0073, 0x0302}); + table.put(0x015C, new int [] {0x0053, 0x0302}); + table.put(0x015B, new int [] {0x0073, 0x0301}); + table.put(0x015A, new int [] {0x0053, 0x0301}); + table.put(0x1E9B, new int [] {0x017F, 0x0307}); + table.put(0x0159, new int [] {0x0072, 0x030C}); + table.put(0x0158, new int [] {0x0052, 0x030C}); + table.put(0x1E99, new int [] {0x0079, 0x030A}); + table.put(0x232A, new int [] {0x3009}); + table.put(0x0157, new int [] {0x0072, 0x0327}); + table.put(0x1E98, new int [] {0x0077, 0x030A}); + table.put(0x0156, new int [] {0x0052, 0x0327}); + table.put(0x1E97, new int [] {0x0074, 0x0308}); + table.put(0x0155, new int [] {0x0072, 0x0301}); + table.put(0x1E96, new int [] {0x0068, 0x0331}); + table.put(0x0154, new int [] {0x0052, 0x0301}); + table.put(0x1E95, new int [] {0x007A, 0x0331}); + table.put(0x1E94, new int [] {0x005A, 0x0331}); + table.put(0x1E93, new int [] {0x007A, 0x0323}); + table.put(0x0151, new int [] {0x006F, 0x030B}); + table.put(0x1E92, new int [] {0x005A, 0x0323}); + table.put(0x0150, new int [] {0x004F, 0x030B}); + table.put(0x1E91, new int [] {0x007A, 0x0302}); + table.put(0x2329, new int [] {0x3008}); + table.put(0x1E90, new int [] {0x005A, 0x0302}); + table.put(0x014F, new int [] {0x006F, 0x0306}); + table.put(0x014E, new int [] {0x004F, 0x0306}); + table.put(0x1E8F, new int [] {0x0079, 0x0307}); + table.put(0x014D, new int [] {0x006F, 0x0304}); + table.put(0x1E8E, new int [] {0x0059, 0x0307}); + table.put(0x014C, new int [] {0x004F, 0x0304}); + table.put(0x1E8D, new int [] {0x0078, 0x0308}); + table.put(0x1E8C, new int [] {0x0058, 0x0308}); + table.put(0x1E8B, new int [] {0x0078, 0x0307}); + table.put(0x1E8A, new int [] {0x0058, 0x0307}); + table.put(0x0148, new int [] {0x006E, 0x030C}); + table.put(0x1E89, new int [] {0x0077, 0x0323}); + table.put(0x0147, new int [] {0x004E, 0x030C}); + table.put(0x1E88, new int [] {0x0057, 0x0323}); + table.put(0x0146, new int [] {0x006E, 0x0327}); + table.put(0x1E87, new int [] {0x0077, 0x0307}); + table.put(0x0145, new int [] {0x004E, 0x0327}); + table.put(0x1E86, new int [] {0x0057, 0x0307}); + table.put(0x0144, new int [] {0x006E, 0x0301}); + table.put(0x1E85, new int [] {0x0077, 0x0308}); + table.put(0x0143, new int [] {0x004E, 0x0301}); + table.put(0x1E84, new int [] {0x0057, 0x0308}); + table.put(0x1E83, new int [] {0x0077, 0x0301}); + table.put(0x1E82, new int [] {0x0057, 0x0301}); + table.put(0x1E81, new int [] {0x0077, 0x0300}); + table.put(0x1E80, new int [] {0x0057, 0x0300}); + table.put(0x0A36, new int [] {0x0A38, 0x0A3C}); + table.put(0x0A33, new int [] {0x0A32, 0x0A3C}); + table.put(0x013E, new int [] {0x006C, 0x030C}); + table.put(0x1E7F, new int [] {0x0076, 0x0323}); + table.put(0x013D, new int [] {0x004C, 0x030C}); + table.put(0x1E7E, new int [] {0x0056, 0x0323}); + table.put(0x013C, new int [] {0x006C, 0x0327}); + table.put(0x1E7D, new int [] {0x0076, 0x0303}); + table.put(0x013B, new int [] {0x004C, 0x0327}); + table.put(0x1E7C, new int [] {0x0056, 0x0303}); + table.put(0x013A, new int [] {0x006C, 0x0301}); + table.put(0x1E7B, new int [] {0x0075, 0x0304, 0x0308}); + table.put(0x1E7A, new int [] {0x0055, 0x0304, 0x0308}); + table.put(0x0139, new int [] {0x004C, 0x0301}); + table.put(0x1E79, new int [] {0x0075, 0x0303, 0x0301}); + table.put(0x0137, new int [] {0x006B, 0x0327}); + table.put(0x1E78, new int [] {0x0055, 0x0303, 0x0301}); + table.put(0x0136, new int [] {0x004B, 0x0327}); + table.put(0x1E77, new int [] {0x0075, 0x032D}); + table.put(0x0135, new int [] {0x006A, 0x0302}); + table.put(0x1E76, new int [] {0x0055, 0x032D}); + table.put(0x0134, new int [] {0x004A, 0x0302}); + table.put(0x1E75, new int [] {0x0075, 0x0330}); + table.put(0x1E74, new int [] {0x0055, 0x0330}); + table.put(0x1E73, new int [] {0x0075, 0x0324}); + table.put(0x1E72, new int [] {0x0055, 0x0324}); + table.put(0x0130, new int [] {0x0049, 0x0307}); + table.put(0x1E71, new int [] {0x0074, 0x032D}); + table.put(0x1E70, new int [] {0x0054, 0x032D}); + table.put(0x012F, new int [] {0x0069, 0x0328}); + table.put(0x012E, new int [] {0x0049, 0x0328}); + table.put(0x1E6F, new int [] {0x0074, 0x0331}); + table.put(0x012D, new int [] {0x0069, 0x0306}); + table.put(0x1E6E, new int [] {0x0054, 0x0331}); + table.put(0x012C, new int [] {0x0049, 0x0306}); + table.put(0x1E6D, new int [] {0x0074, 0x0323}); + table.put(0x012B, new int [] {0x0069, 0x0304}); + table.put(0x1E6C, new int [] {0x0054, 0x0323}); + table.put(0x012A, new int [] {0x0049, 0x0304}); + table.put(0x1E6B, new int [] {0x0074, 0x0307}); + table.put(0x1E6A, new int [] {0x0054, 0x0307}); + table.put(0x0129, new int [] {0x0069, 0x0303}); + table.put(0x0128, new int [] {0x0049, 0x0303}); + table.put(0x1E69, new int [] {0x0073, 0x0323, 0x0307}); + table.put(0x1E68, new int [] {0x0053, 0x0323, 0x0307}); + table.put(0x1E67, new int [] {0x0073, 0x030C, 0x0307}); + table.put(0x0125, new int [] {0x0068, 0x0302}); + table.put(0x1E66, new int [] {0x0053, 0x030C, 0x0307}); + table.put(0x0124, new int [] {0x0048, 0x0302}); + table.put(0x1E65, new int [] {0x0073, 0x0301, 0x0307}); + table.put(0x0123, new int [] {0x0067, 0x0327}); + table.put(0x1E64, new int [] {0x0053, 0x0301, 0x0307}); + table.put(0x0122, new int [] {0x0047, 0x0327}); + table.put(0x1E63, new int [] {0x0073, 0x0323}); + table.put(0x0121, new int [] {0x0067, 0x0307}); + table.put(0x1E62, new int [] {0x0053, 0x0323}); + table.put(0x0120, new int [] {0x0047, 0x0307}); + table.put(0x1E61, new int [] {0x0073, 0x0307}); + table.put(0x1E60, new int [] {0x0053, 0x0307}); + table.put(0x30FE, new int [] {0x30FD, 0x3099}); + table.put(0xF99F, new int [] {0x70C8}); + table.put(0xF99E, new int [] {0x54BD}); + table.put(0x011F, new int [] {0x0067, 0x0306}); + table.put(0xF99D, new int [] {0x52A3}); + table.put(0x011E, new int [] {0x0047, 0x0306}); + table.put(0x1E5F, new int [] {0x0072, 0x0331}); + table.put(0x30FA, new int [] {0x30F2, 0x3099}); + table.put(0xF99C, new int [] {0x5217}); + table.put(0x011D, new int [] {0x0067, 0x0302}); + table.put(0x1E5E, new int [] {0x0052, 0x0331}); + table.put(0xF99B, new int [] {0x934A}); + table.put(0x011C, new int [] {0x0047, 0x0302}); + table.put(0x1E5D, new int [] {0x0072, 0x0323, 0x0304}); + table.put(0xF99A, new int [] {0x9023}); + table.put(0x011B, new int [] {0x0065, 0x030C}); + table.put(0x1E5C, new int [] {0x0052, 0x0323, 0x0304}); + table.put(0x011A, new int [] {0x0045, 0x030C}); + table.put(0x1E5B, new int [] {0x0072, 0x0323}); + table.put(0x1E5A, new int [] {0x0052, 0x0323}); + table.put(0x30F9, new int [] {0x30F1, 0x3099}); + table.put(0x30F8, new int [] {0x30F0, 0x3099}); + table.put(0x30F7, new int [] {0x30EF, 0x3099}); + table.put(0xF999, new int [] {0x84EE}); + table.put(0xF998, new int [] {0x8F26}); + table.put(0x0119, new int [] {0x0065, 0x0328}); + table.put(0xF997, new int [] {0x806F}); + table.put(0x0118, new int [] {0x0045, 0x0328}); + table.put(0x1E59, new int [] {0x0072, 0x0307}); + table.put(0x30F4, new int [] {0x30A6, 0x3099}); + table.put(0xF996, new int [] {0x7DF4}); + table.put(0x0117, new int [] {0x0065, 0x0307}); + table.put(0x1E58, new int [] {0x0052, 0x0307}); + table.put(0xF995, new int [] {0x79CA}); + table.put(0x0116, new int [] {0x0045, 0x0307}); + table.put(0x1E57, new int [] {0x0070, 0x0307}); + table.put(0xF994, new int [] {0x7489}); + table.put(0xF993, new int [] {0x7149}); + table.put(0x0115, new int [] {0x0065, 0x0306}); + table.put(0x0477, new int [] {0x0475, 0x030F}); + table.put(0x1E56, new int [] {0x0050, 0x0307}); + table.put(0xF992, new int [] {0x6F23}); + table.put(0x1E55, new int [] {0x0070, 0x0301}); + table.put(0x0476, new int [] {0x0474, 0x030F}); + table.put(0x0114, new int [] {0x0045, 0x0306}); + table.put(0xF991, new int [] {0x649A}); + table.put(0x1E54, new int [] {0x0050, 0x0301}); + table.put(0x0113, new int [] {0x0065, 0x0304}); + table.put(0xF990, new int [] {0x6200}); + table.put(0x1E53, new int [] {0x006F, 0x0304, 0x0301}); + table.put(0x0112, new int [] {0x0045, 0x0304}); + table.put(0x1E52, new int [] {0x004F, 0x0304, 0x0301}); + table.put(0x1E51, new int [] {0x006F, 0x0304, 0x0300}); + table.put(0x1E50, new int [] {0x004F, 0x0304, 0x0300}); + table.put(0xF98F, new int [] {0x6190}); + table.put(0xF98E, new int [] {0x5E74}); + table.put(0xF98D, new int [] {0x8F62}); + table.put(0x010F, new int [] {0x0064, 0x030C}); + table.put(0xF98C, new int [] {0x6B77}); + table.put(0x1E4F, new int [] {0x006F, 0x0303, 0x0308}); + table.put(0x010E, new int [] {0x0044, 0x030C}); + table.put(0xF98B, new int [] {0x66C6}); + table.put(0x1E4E, new int [] {0x004F, 0x0303, 0x0308}); + table.put(0x010D, new int [] {0x0063, 0x030C}); + table.put(0xF98A, new int [] {0x529B}); + table.put(0x1E4D, new int [] {0x006F, 0x0303, 0x0301}); + table.put(0x010C, new int [] {0x0043, 0x030C}); + table.put(0x1E4C, new int [] {0x004F, 0x0303, 0x0301}); + table.put(0x010B, new int [] {0x0063, 0x0307}); + table.put(0x1E4B, new int [] {0x006E, 0x032D}); + table.put(0x010A, new int [] {0x0043, 0x0307}); + table.put(0x1E4A, new int [] {0x004E, 0x032D}); + table.put(0xF989, new int [] {0x9ECE}); + table.put(0xF988, new int [] {0x9E97}); + table.put(0xF987, new int [] {0x9A6A}); + table.put(0x0109, new int [] {0x0063, 0x0302}); + table.put(0xF986, new int [] {0x95AD}); + table.put(0x1E49, new int [] {0x006E, 0x0331}); + table.put(0x0108, new int [] {0x0043, 0x0302}); + table.put(0xF985, new int [] {0x792A}); + table.put(0x1E48, new int [] {0x004E, 0x0331}); + table.put(0x0107, new int [] {0x0063, 0x0301}); + table.put(0xF984, new int [] {0x6FFE}); + table.put(0x1E47, new int [] {0x006E, 0x0323}); + table.put(0x0106, new int [] {0x0043, 0x0301}); + table.put(0xF983, new int [] {0x65C5}); + table.put(0x1E46, new int [] {0x004E, 0x0323}); + table.put(0x0105, new int [] {0x0061, 0x0328}); + table.put(0xF982, new int [] {0x5EEC}); + table.put(0x1E45, new int [] {0x006E, 0x0307}); + table.put(0x0104, new int [] {0x0041, 0x0328}); + table.put(0xF981, new int [] {0x5973}); + table.put(0x1E44, new int [] {0x004E, 0x0307}); + table.put(0x0103, new int [] {0x0061, 0x0306}); + table.put(0xF980, new int [] {0x5442}); + table.put(0x1E43, new int [] {0x006D, 0x0323}); + table.put(0x0102, new int [] {0x0041, 0x0306}); + table.put(0x1E42, new int [] {0x004D, 0x0323}); + table.put(0x0101, new int [] {0x0061, 0x0304}); + table.put(0x1E41, new int [] {0x006D, 0x0307}); + table.put(0x0100, new int [] {0x0041, 0x0304}); + table.put(0x1E40, new int [] {0x004D, 0x0307}); + table.put(0xF97F, new int [] {0x52F5}); + table.put(0x30DD, new int [] {0x30DB, 0x309A}); + table.put(0xF97E, new int [] {0x91CF}); + table.put(0x30DC, new int [] {0x30DB, 0x3099}); + table.put(0xF97D, new int [] {0x8AD2}); + table.put(0x2F8FF, new int [] {0x6D16}); + table.put(0xF97C, new int [] {0x826F}); + table.put(0x30DA, new int [] {0x30D8, 0x309A}); + table.put(0x1E3F, new int [] {0x006D, 0x0301}); + table.put(0x2F8FE, new int [] {0x6C67}); + table.put(0xF97B, new int [] {0x7CE7}); + table.put(0x1E3E, new int [] {0x004D, 0x0301}); + table.put(0x2F8FD, new int [] {0x6CCD}); + table.put(0xF97A, new int [] {0x6881}); + table.put(0x1E3D, new int [] {0x006C, 0x032D}); + table.put(0x045E, new int [] {0x0443, 0x0306}); + table.put(0x2F8FC, new int [] {0x6CBF}); + table.put(0x1E3C, new int [] {0x004C, 0x032D}); + table.put(0x045D, new int [] {0x0438, 0x0300}); + table.put(0x2F8FB, new int [] {0x23CBC}); + table.put(0x1E3B, new int [] {0x006C, 0x0331}); + table.put(0x045C, new int [] {0x043A, 0x0301}); + table.put(0x2F8FA, new int [] {0x6C4E}); + table.put(0x1E3A, new int [] {0x004C, 0x0331}); + table.put(0x30D9, new int [] {0x30D8, 0x3099}); + table.put(0xF979, new int [] {0x51C9}); + table.put(0x30D7, new int [] {0x30D5, 0x309A}); + table.put(0xF978, new int [] {0x5169}); + table.put(0x30D6, new int [] {0x30D5, 0x3099}); + table.put(0xF977, new int [] {0x4EAE}); + table.put(0x2F8F9, new int [] {0x23AFA}); + table.put(0xF976, new int [] {0x7565}); + table.put(0x30D4, new int [] {0x30D2, 0x309A}); + table.put(0x1E39, new int [] {0x006C, 0x0323, 0x0304}); + table.put(0x2F8F8, new int [] {0x21D0B}); + table.put(0xF975, new int [] {0x63A0}); + table.put(0x30D3, new int [] {0x30D2, 0x3099}); + table.put(0x1E38, new int [] {0x004C, 0x0323, 0x0304}); + table.put(0x0D4C, new int [] {0x0D46, 0x0D57}); + table.put(0x2F8F7, new int [] {0x23A8D}); + table.put(0xF974, new int [] {0x82E5}); + table.put(0x1E37, new int [] {0x006C, 0x0323}); + table.put(0x0D4B, new int [] {0x0D47, 0x0D3E}); + table.put(0x2F8F6, new int [] {0x6BBB}); + table.put(0xF973, new int [] {0x62FE}); + table.put(0x30D1, new int [] {0x30CF, 0x309A}); + table.put(0x1E36, new int [] {0x004C, 0x0323}); + table.put(0x0D4A, new int [] {0x0D46, 0x0D3E}); + table.put(0x0457, new int [] {0x0456, 0x0308}); + table.put(0x2F8F5, new int [] {0x6BBA}); + table.put(0xF972, new int [] {0x6C88}); + table.put(0x30D0, new int [] {0x30CF, 0x3099}); + table.put(0x1E35, new int [] {0x006B, 0x0331}); + table.put(0x2F8F4, new int [] {0x6B9F}); + table.put(0xF971, new int [] {0x8FB0}); + table.put(0x1E34, new int [] {0x004B, 0x0331}); + table.put(0x2F8F3, new int [] {0x6B72}); + table.put(0xF970, new int [] {0x6BBA}); + table.put(0x1E33, new int [] {0x006B, 0x0323}); + table.put(0x2F8F2, new int [] {0x3C4E}); + table.put(0x1E32, new int [] {0x004B, 0x0323}); + table.put(0x0453, new int [] {0x0433, 0x0301}); + table.put(0x2F8F1, new int [] {0x6B54}); + table.put(0x1E31, new int [] {0x006B, 0x0301}); + table.put(0x2F8F0, new int [] {0x238A7}); + table.put(0x1E30, new int [] {0x004B, 0x0301}); + table.put(0x0451, new int [] {0x0435, 0x0308}); + table.put(0x0450, new int [] {0x0435, 0x0300}); + table.put(0xF96F, new int [] {0x8AAA}); + table.put(0xF96E, new int [] {0x8449}); + table.put(0xF96D, new int [] {0x7701}); + table.put(0x2F8EF, new int [] {0x6B21}); + table.put(0xF96C, new int [] {0x585E}); + table.put(0x1E2F, new int [] {0x0069, 0x0308, 0x0301}); + table.put(0x2F8EE, new int [] {0x3C18}); + table.put(0xF96B, new int [] {0x53C3}); + table.put(0x1E2E, new int [] {0x0049, 0x0308, 0x0301}); + table.put(0x2F8ED, new int [] {0x6ADB}); + table.put(0xF96A, new int [] {0x7D22}); + table.put(0x1E2D, new int [] {0x0069, 0x0330}); + table.put(0x2F8EC, new int [] {0x236A3}); + table.put(0x1E2C, new int [] {0x0049, 0x0330}); + table.put(0x2F8EB, new int [] {0x6AA8}); + table.put(0x1E2B, new int [] {0x0068, 0x032E}); + table.put(0x2F8EA, new int [] {0x69EA}); + table.put(0x1E2A, new int [] {0x0048, 0x032E}); + table.put(0x30C9, new int [] {0x30C8, 0x3099}); + table.put(0xF969, new int [] {0x6578}); + table.put(0x30C7, new int [] {0x30C6, 0x3099}); + table.put(0xF968, new int [] {0x6CCC}); + table.put(0xF967, new int [] {0x4E0D}); + table.put(0x30C5, new int [] {0x30C4, 0x3099}); + table.put(0x2F8E9, new int [] {0x69A3}); + table.put(0xF966, new int [] {0x5FA9}); + table.put(0x1E29, new int [] {0x0068, 0x0327}); + table.put(0x2F8E8, new int [] {0x6942}); + table.put(0xF965, new int [] {0x4FBF}); + table.put(0x1E28, new int [] {0x0048, 0x0327}); + table.put(0x2F8E7, new int [] {0x3B9D}); + table.put(0xF964, new int [] {0x78FB}); + table.put(0x30C2, new int [] {0x30C1, 0x3099}); + table.put(0x1E27, new int [] {0x0068, 0x0308}); + table.put(0x2F8E6, new int [] {0x6914}); + table.put(0xF963, new int [] {0x5317}); + table.put(0x1E26, new int [] {0x0048, 0x0308}); + table.put(0x2F8E5, new int [] {0x681F}); + table.put(0xF962, new int [] {0x7570}); + table.put(0x30C0, new int [] {0x30BF, 0x3099}); + table.put(0x1E25, new int [] {0x0068, 0x0323}); + table.put(0x2F8E4, new int [] {0x688E}); + table.put(0xF961, new int [] {0x7387}); + table.put(0x1E24, new int [] {0x0048, 0x0323}); + table.put(0x2F8E3, new int [] {0x2346D}); + table.put(0xF960, new int [] {0x6012}); + table.put(0x1E23, new int [] {0x0068, 0x0307}); + table.put(0x2F8E2, new int [] {0x6885}); + table.put(0x1E22, new int [] {0x0048, 0x0307}); + table.put(0x2F8E1, new int [] {0x6852}); + table.put(0x1E21, new int [] {0x0067, 0x0304}); + table.put(0x2F8E0, new int [] {0x6785}); + table.put(0x1E20, new int [] {0x0047, 0x0304}); + table.put(0x30BE, new int [] {0x30BD, 0x3099}); + table.put(0xF95F, new int [] {0x5BE7}); + table.put(0xF95E, new int [] {0x4E39}); + table.put(0x30BC, new int [] {0x30BB, 0x3099}); + table.put(0xF95D, new int [] {0x8AFE}); + table.put(0x2F8DF, new int [] {0x67FA}); + table.put(0xF95C, new int [] {0x6A02}); + table.put(0x30BA, new int [] {0x30B9, 0x3099}); + table.put(0x1E1F, new int [] {0x0066, 0x0307}); + table.put(0x2F8DE, new int [] {0x3B49}); + table.put(0xF95B, new int [] {0x62CF}); + table.put(0x1E1E, new int [] {0x0046, 0x0307}); + table.put(0x2F8DD, new int [] {0x233C3}); + table.put(0xF95A, new int [] {0x8B80}); + table.put(0x1E1D, new int [] {0x0065, 0x0327, 0x0306}); + table.put(0x2F8DC, new int [] {0x6753}); + table.put(0x1E1C, new int [] {0x0045, 0x0327, 0x0306}); + table.put(0x2F8DB, new int [] {0x675E}); + table.put(0x1E1B, new int [] {0x0065, 0x0330}); + table.put(0x2F8DA, new int [] {0x6721}); + table.put(0x1E1A, new int [] {0x0045, 0x0330}); + table.put(0x30B8, new int [] {0x30B7, 0x3099}); + table.put(0xF959, new int [] {0x9675}); + table.put(0xF958, new int [] {0x83F1}); + table.put(0x30B6, new int [] {0x30B5, 0x3099}); + table.put(0xF957, new int [] {0x7DBE}); + table.put(0x2F8D9, new int [] {0x671B}); + table.put(0xF956, new int [] {0x7A1C}); + table.put(0x30B4, new int [] {0x30B3, 0x3099}); + table.put(0x1E19, new int [] {0x0065, 0x032D}); + table.put(0x2F8D8, new int [] {0x6717}); + table.put(0xF955, new int [] {0x51CC}); + table.put(0x1E18, new int [] {0x0045, 0x032D}); + table.put(0x0439, new int [] {0x0438, 0x0306}); + table.put(0x2F8D7, new int [] {0x43D9}); + table.put(0xF954, new int [] {0x51DC}); + table.put(0x30B2, new int [] {0x30B1, 0x3099}); + table.put(0x1E17, new int [] {0x0065, 0x0304, 0x0301}); + table.put(0x2F8D6, new int [] {0x80AD}); + table.put(0xF953, new int [] {0x808B}); + table.put(0x1E16, new int [] {0x0045, 0x0304, 0x0301}); + table.put(0x2F8D5, new int [] {0x669C}); + table.put(0xF952, new int [] {0x52D2}); + table.put(0x30B0, new int [] {0x30AF, 0x3099}); + table.put(0x1E15, new int [] {0x0065, 0x0304, 0x0300}); + table.put(0x2F8D4, new int [] {0x6700}); + table.put(0xF951, new int [] {0x964B}); + table.put(0x1E14, new int [] {0x0045, 0x0304, 0x0300}); + table.put(0x2F8D3, new int [] {0x5195}); + table.put(0xF950, new int [] {0x7E37}); + table.put(0x1E13, new int [] {0x0064, 0x032D}); + table.put(0x2F8D2, new int [] {0x5192}); + table.put(0x1E12, new int [] {0x0044, 0x032D}); + table.put(0x2F8D1, new int [] {0x3AE4}); + table.put(0x1E11, new int [] {0x0064, 0x0327}); + table.put(0x2F8D0, new int [] {0x3B08}); + table.put(0x1E10, new int [] {0x0044, 0x0327}); + table.put(0x30AE, new int [] {0x30AD, 0x3099}); + table.put(0xF94F, new int [] {0x7D2F}); + table.put(0xF94E, new int [] {0x6F0F}); + table.put(0x30AC, new int [] {0x30AB, 0x3099}); + table.put(0xF94D, new int [] {0x6DDA}); + table.put(0x2F8CF, new int [] {0x6691}); + table.put(0xF94C, new int [] {0x6A13}); + table.put(0x1E0F, new int [] {0x0064, 0x0331}); + table.put(0x2F8CE, new int [] {0x3B19}); + table.put(0xF94B, new int [] {0x5C62}); + table.put(0x1E0E, new int [] {0x0044, 0x0331}); + table.put(0x2F8CD, new int [] {0x6649}); + table.put(0xF94A, new int [] {0x58D8}); + table.put(0x1E0D, new int [] {0x0064, 0x0323}); + table.put(0x2F8CC, new int [] {0x66F8}); + table.put(0x1E0C, new int [] {0x0044, 0x0323}); + table.put(0x2F8CB, new int [] {0x65E3}); + table.put(0x1E0B, new int [] {0x0064, 0x0307}); + table.put(0x2F8CA, new int [] {0x2300A}); + table.put(0x1E0A, new int [] {0x0044, 0x0307}); + table.put(0xF949, new int [] {0x96F7}); + table.put(0xF948, new int [] {0x8CC2}); + table.put(0xF947, new int [] {0x78CA}); + table.put(0x2F8C9, new int [] {0x656C}); + table.put(0xF946, new int [] {0x7262}); + table.put(0x1E09, new int [] {0x0063, 0x0327, 0x0301}); + table.put(0x2F8C8, new int [] {0x654F}); + table.put(0xF945, new int [] {0x807E}); + table.put(0x1E08, new int [] {0x0043, 0x0327, 0x0301}); + table.put(0x2F8C7, new int [] {0x3A6C}); + table.put(0xF944, new int [] {0x7C60}); + table.put(0x1E07, new int [] {0x0062, 0x0331}); + table.put(0x2F8C6, new int [] {0x6477}); + table.put(0xF943, new int [] {0x5F04}); + table.put(0x1E06, new int [] {0x0042, 0x0331}); + table.put(0x2F8C5, new int [] {0x649D}); + table.put(0xF942, new int [] {0x58DF}); + table.put(0x1E05, new int [] {0x0062, 0x0323}); + table.put(0x2F8C4, new int [] {0x647E}); + table.put(0xF941, new int [] {0x8AD6}); + table.put(0x1E04, new int [] {0x0042, 0x0323}); + table.put(0x2F8C3, new int [] {0x6469}); + table.put(0xF940, new int [] {0x9E7F}); + table.put(0x1E03, new int [] {0x0062, 0x0307}); + table.put(0x2F8C2, new int [] {0x3A2E}); + table.put(0x1E02, new int [] {0x0042, 0x0307}); + table.put(0x2F8C1, new int [] {0x63A9}); + table.put(0x1E01, new int [] {0x0061, 0x0325}); + table.put(0x2F8C0, new int [] {0x63C5}); + table.put(0x1E00, new int [] {0x0041, 0x0325}); + table.put(0xF93F, new int [] {0x9304}); + table.put(0xF93E, new int [] {0x83C9}); + table.put(0xF93D, new int [] {0x7DA0}); + table.put(0x2F8BF, new int [] {0x6422}); + table.put(0xF93C, new int [] {0x797F}); + table.put(0x2F8BE, new int [] {0x22BF1}); + table.put(0xF93B, new int [] {0x788C}); + table.put(0x2F8BD, new int [] {0x63E4}); + table.put(0xF93A, new int [] {0x9DFA}); + table.put(0x2F8BC, new int [] {0x6383}); + table.put(0x2F8BB, new int [] {0x6368}); + table.put(0x2F8BA, new int [] {0x62FC}); + table.put(0xF939, new int [] {0x9B6F}); + table.put(0xF938, new int [] {0x9732}); + table.put(0xF937, new int [] {0x8DEF}); + table.put(0x2F8B9, new int [] {0x633D}); + table.put(0xF936, new int [] {0x865C}); + table.put(0x2F8B8, new int [] {0x22B0C}); + table.put(0xF935, new int [] {0x8606}); + table.put(0x0419, new int [] {0x0418, 0x0306}); + table.put(0x2F8B7, new int [] {0x6350}); + table.put(0xF934, new int [] {0x8001}); + table.put(0x2F8B6, new int [] {0x62D4}); + table.put(0xF933, new int [] {0x76E7}); + table.put(0x2F8B5, new int [] {0x62B1}); + table.put(0xF932, new int [] {0x7210}); + table.put(0x2F8B4, new int [] {0x625D}); + table.put(0xF931, new int [] {0x6AD3}); + table.put(0x2F8B3, new int [] {0x621B}); + table.put(0xF930, new int [] {0x64C4}); + table.put(0x2F8B2, new int [] {0x6210}); + table.put(0x2F8B1, new int [] {0x61F6}); + table.put(0x2F8B0, new int [] {0x61F2}); + table.put(0xF92F, new int [] {0x52DE}); + table.put(0xF92E, new int [] {0x51B7}); + table.put(0xF92D, new int [] {0x4F86}); + table.put(0x2F8AF, new int [] {0x61DE}); + table.put(0xF92C, new int [] {0x90CE}); + table.put(0x2F8AE, new int [] {0x61AF}); + table.put(0xF92B, new int [] {0x72FC}); + table.put(0x2F8AD, new int [] {0x61A4}); + table.put(0xF92A, new int [] {0x6D6A}); + table.put(0x040E, new int [] {0x0423, 0x0306}); + table.put(0x2F8AC, new int [] {0x61B2}); + table.put(0x040D, new int [] {0x0418, 0x0300}); + table.put(0x2F8AB, new int [] {0x618E}); + table.put(0x040C, new int [] {0x041A, 0x0301}); + table.put(0x2F8AA, new int [] {0x617A}); + table.put(0xF929, new int [] {0x6717}); + table.put(0xF928, new int [] {0x5ECA}); + table.put(0xF927, new int [] {0x881F}); + table.put(0x2F8A9, new int [] {0x614C}); + table.put(0xF926, new int [] {0x81D8}); + table.put(0x2F8A8, new int [] {0x614E}); + table.put(0xF925, new int [] {0x62C9}); + table.put(0x2F8A7, new int [] {0x614C}); + table.put(0xF924, new int [] {0x8964}); + table.put(0x2F8A6, new int [] {0x6148}); + table.put(0xF923, new int [] {0x85CD}); + table.put(0x0407, new int [] {0x0406, 0x0308}); + table.put(0x2F8A5, new int [] {0x60C7}); + table.put(0xF922, new int [] {0x6FEB}); + table.put(0x2F8A4, new int [] {0x226D4}); + table.put(0xF921, new int [] {0x5D50}); + table.put(0x2F8A3, new int [] {0x6094}); + table.put(0xF920, new int [] {0x9E1E}); + table.put(0x2F8A2, new int [] {0x391C}); + table.put(0x0403, new int [] {0x0413, 0x0301}); + table.put(0x2F8A1, new int [] {0x393A}); + table.put(0x2F8A0, new int [] {0x6081}); + table.put(0x0401, new int [] {0x0415, 0x0308}); + table.put(0x0400, new int [] {0x0415, 0x0300}); + table.put(0xF91F, new int [] {0x862D}); + table.put(0xF91E, new int [] {0x721B}); + table.put(0xF91D, new int [] {0x6B04}); + table.put(0xF91C, new int [] {0x5375}); + table.put(0xF91B, new int [] {0x4E82}); + table.put(0xF91A, new int [] {0x99F1}); + table.put(0xF919, new int [] {0x916A}); + table.put(0x22ED, new int [] {0x22B5, 0x0338}); + table.put(0xF918, new int [] {0x843D}); + table.put(0x22EC, new int [] {0x22B4, 0x0338}); + table.put(0xF917, new int [] {0x73DE}); + table.put(0x22EB, new int [] {0x22B3, 0x0338}); + table.put(0xF916, new int [] {0x70D9}); + table.put(0x22EA, new int [] {0x22B2, 0x0338}); + table.put(0xF915, new int [] {0x6D1B}); + table.put(0xF914, new int [] {0x6A02}); + table.put(0xF913, new int [] {0x908F}); + table.put(0xF912, new int [] {0x88F8}); + table.put(0xF911, new int [] {0x87BA}); + table.put(0xF910, new int [] {0x863F}); + table.put(0x22E3, new int [] {0x2292, 0x0338}); + table.put(0xF90F, new int [] {0x7F85}); + table.put(0x22E2, new int [] {0x2291, 0x0338}); + table.put(0xF90E, new int [] {0x7669}); + table.put(0x22E1, new int [] {0x227D, 0x0338}); + table.put(0xF90D, new int [] {0x61F6}); + table.put(0x22E0, new int [] {0x227C, 0x0338}); + table.put(0xF90C, new int [] {0x5948}); + table.put(0xF90B, new int [] {0x5587}); + table.put(0xF90A, new int [] {0x91D1}); + table.put(0xF909, new int [] {0x5951}); + table.put(0xF908, new int [] {0x9F9C}); + table.put(0xF907, new int [] {0x9F9C}); + table.put(0xF906, new int [] {0x53E5}); + table.put(0xF905, new int [] {0x4E32}); + table.put(0xF904, new int [] {0x6ED1}); + table.put(0xF903, new int [] {0x8CC8}); + table.put(0xF902, new int [] {0x8ECA}); + table.put(0xF901, new int [] {0x66F4}); + table.put(0xF900, new int [] {0x8C48}); + table.put(0x00FF, new int [] {0x0079, 0x0308}); + table.put(0x00FD, new int [] {0x0079, 0x0301}); + table.put(0x00FC, new int [] {0x0075, 0x0308}); + table.put(0x00FB, new int [] {0x0075, 0x0302}); + table.put(0x00FA, new int [] {0x0075, 0x0301}); + table.put(0x00F9, new int [] {0x0075, 0x0300}); + table.put(0x00F6, new int [] {0x006F, 0x0308}); + table.put(0x00F5, new int [] {0x006F, 0x0303}); + table.put(0x00F4, new int [] {0x006F, 0x0302}); + table.put(0x00F3, new int [] {0x006F, 0x0301}); + table.put(0x00F2, new int [] {0x006F, 0x0300}); + table.put(0x00F1, new int [] {0x006E, 0x0303}); + table.put(0x00EF, new int [] {0x0069, 0x0308}); + table.put(0x00EE, new int [] {0x0069, 0x0302}); + table.put(0x00ED, new int [] {0x0069, 0x0301}); + table.put(0x00EC, new int [] {0x0069, 0x0300}); + table.put(0x00EB, new int [] {0x0065, 0x0308}); + table.put(0x00EA, new int [] {0x0065, 0x0302}); + table.put(0x00E9, new int [] {0x0065, 0x0301}); + table.put(0x00E8, new int [] {0x0065, 0x0300}); + table.put(0x00E7, new int [] {0x0063, 0x0327}); + table.put(0x00E5, new int [] {0x0061, 0x030A}); + table.put(0x00E4, new int [] {0x0061, 0x0308}); + table.put(0x00E3, new int [] {0x0061, 0x0303}); + table.put(0x00E2, new int [] {0x0061, 0x0302}); + table.put(0x00E1, new int [] {0x0061, 0x0301}); + table.put(0x00E0, new int [] {0x0061, 0x0300}); + table.put(0x00DD, new int [] {0x0059, 0x0301}); + table.put(0x00DC, new int [] {0x0055, 0x0308}); + table.put(0x00DB, new int [] {0x0055, 0x0302}); + table.put(0x00DA, new int [] {0x0055, 0x0301}); + table.put(0x22AF, new int [] {0x22AB, 0x0338}); + table.put(0x22AE, new int [] {0x22A9, 0x0338}); + table.put(0x22AD, new int [] {0x22A8, 0x0338}); + table.put(0x22AC, new int [] {0x22A2, 0x0338}); + table.put(0x00D9, new int [] {0x0055, 0x0300}); + table.put(0x00D6, new int [] {0x004F, 0x0308}); + table.put(0x00D5, new int [] {0x004F, 0x0303}); + table.put(0x00D4, new int [] {0x004F, 0x0302}); + table.put(0x00D3, new int [] {0x004F, 0x0301}); + table.put(0x00D2, new int [] {0x004F, 0x0300}); + table.put(0x00D1, new int [] {0x004E, 0x0303}); + table.put(0x309E, new int [] {0x309D, 0x3099}); + table.put(0x00CF, new int [] {0x0049, 0x0308}); + table.put(0x00CE, new int [] {0x0049, 0x0302}); + table.put(0x00CD, new int [] {0x0049, 0x0301}); + table.put(0x00CC, new int [] {0x0049, 0x0300}); + table.put(0x00CB, new int [] {0x0045, 0x0308}); + table.put(0x00CA, new int [] {0x0045, 0x0302}); + table.put(0x00C9, new int [] {0x0045, 0x0301}); + table.put(0x3094, new int [] {0x3046, 0x3099}); + table.put(0x00C8, new int [] {0x0045, 0x0300}); + table.put(0x00C7, new int [] {0x0043, 0x0327}); + table.put(0x00C5, new int [] {0x0041, 0x030A}); + table.put(0x00C4, new int [] {0x0041, 0x0308}); + table.put(0x00C3, new int [] {0x0041, 0x0303}); + table.put(0x00C2, new int [] {0x0041, 0x0302}); + table.put(0x00C1, new int [] {0x0041, 0x0301}); + table.put(0x00C0, new int [] {0x0041, 0x0300}); + table.put(0x307D, new int [] {0x307B, 0x309A}); + table.put(0x307C, new int [] {0x307B, 0x3099}); + table.put(0x2F89F, new int [] {0x5FF9}); + table.put(0x307A, new int [] {0x3078, 0x309A}); + table.put(0x2F89E, new int [] {0x5FD7}); + table.put(0x2F89D, new int [] {0x5FCD}); + table.put(0x2F89C, new int [] {0x5F9A}); + table.put(0x2F89B, new int [] {0x38E3}); + table.put(0x2F89A, new int [] {0x5F6B}); + table.put(0x3079, new int [] {0x3078, 0x3099}); + table.put(0x3077, new int [] {0x3075, 0x309A}); + table.put(0x3076, new int [] {0x3075, 0x3099}); + table.put(0x2F899, new int [] {0x5F62}); + table.put(0x3074, new int [] {0x3072, 0x309A}); + table.put(0x2F898, new int [] {0x261DA}); + table.put(0x3073, new int [] {0x3072, 0x3099}); + table.put(0x2F897, new int [] {0x232B8}); + table.put(0x2F896, new int [] {0x38C7}); + table.put(0x3071, new int [] {0x306F, 0x309A}); + table.put(0x2F895, new int [] {0x5F22}); + table.put(0x3070, new int [] {0x306F, 0x3099}); + table.put(0x2F894, new int [] {0x5F22}); + table.put(0x2F893, new int [] {0x8201}); + table.put(0x2F892, new int [] {0x22331}); + table.put(0x2F891, new int [] {0x22331}); + table.put(0x2F890, new int [] {0x5EFE}); + table.put(0x2F88F, new int [] {0x2A392}); + table.put(0x2F88E, new int [] {0x5ECA}); + table.put(0x2F88D, new int [] {0x5EB6}); + table.put(0x2F88C, new int [] {0x5EB3}); + table.put(0x2F88B, new int [] {0x5EB0}); + table.put(0x2F88A, new int [] {0x387C}); + table.put(0x3069, new int [] {0x3068, 0x3099}); + table.put(0x3067, new int [] {0x3066, 0x3099}); + table.put(0x3065, new int [] {0x3064, 0x3099}); + table.put(0x2F889, new int [] {0x22183}); + table.put(0x2F888, new int [] {0x3862}); + table.put(0x2F887, new int [] {0x5E69}); + table.put(0x3062, new int [] {0x3061, 0x3099}); + table.put(0x2F886, new int [] {0x5E3D}); + table.put(0x2F885, new int [] {0x5E28}); + table.put(0x3060, new int [] {0x305F, 0x3099}); + table.put(0x2F884, new int [] {0x5DFD}); + table.put(0x2F883, new int [] {0x382F}); + table.put(0x2F882, new int [] {0x5DE2}); + table.put(0x2F881, new int [] {0x5DE1}); + table.put(0x2F880, new int [] {0x5D7C}); + table.put(0x305E, new int [] {0x305D, 0x3099}); + table.put(0x305C, new int [] {0x305B, 0x3099}); + table.put(0x2F87F, new int [] {0x5D6B}); + table.put(0x305A, new int [] {0x3059, 0x3099}); + table.put(0x2F87E, new int [] {0x5D6E}); + table.put(0x2F87D, new int [] {0x21DE6}); + table.put(0x2F87C, new int [] {0x5D43}); + table.put(0x2F87B, new int [] {0x21DE4}); + table.put(0x2F87A, new int [] {0x5C8D}); + table.put(0x3058, new int [] {0x3057, 0x3099}); + table.put(0x3056, new int [] {0x3055, 0x3099}); + table.put(0x2F879, new int [] {0x5CC0}); + table.put(0x3054, new int [] {0x3053, 0x3099}); + table.put(0x2F878, new int [] {0x5C6E}); + table.put(0x2F877, new int [] {0x5C60}); + table.put(0x3052, new int [] {0x3051, 0x3099}); + table.put(0x2F876, new int [] {0x3781}); + table.put(0x2F875, new int [] {0x5C22}); + table.put(0x3050, new int [] {0x304F, 0x3099}); + table.put(0x2F874, new int [] {0x5F53}); + table.put(0x2F873, new int [] {0x5C06}); + table.put(0x2F872, new int [] {0x5BFF}); + table.put(0x2F871, new int [] {0x21B18}); + table.put(0x2F870, new int [] {0x5BF3}); + table.put(0x304E, new int [] {0x304D, 0x3099}); + table.put(0x304C, new int [] {0x304B, 0x3099}); + table.put(0x2F86F, new int [] {0x5BE7}); + table.put(0x2F86E, new int [] {0x5BD8}); + table.put(0x2F86D, new int [] {0x5BC3}); + table.put(0x2F86C, new int [] {0x219C8}); + table.put(0x2F86B, new int [] {0x5B3E}); + table.put(0x2F86A, new int [] {0x5B3E}); + table.put(0x2F869, new int [] {0x5B08}); + table.put(0x2F868, new int [] {0x36FC}); + table.put(0x2F867, new int [] {0x36EE}); + table.put(0x0CCB, new int [] {0x0CC6, 0x0CC2, 0x0CD5}); + table.put(0x2F866, new int [] {0x5A66}); + table.put(0x0CCA, new int [] {0x0CC6, 0x0CC2}); + table.put(0x2F865, new int [] {0x59D8}); + table.put(0x2F864, new int [] {0x5A27}); + table.put(0x2F863, new int [] {0x5A1B}); + table.put(0x03D4, new int [] {0x03D2, 0x0308}); + table.put(0x2F862, new int [] {0x59EC}); + table.put(0x03D3, new int [] {0x03D2, 0x0301}); + table.put(0x2F861, new int [] {0x216EA}); + table.put(0x2F860, new int [] {0x216A8}); + table.put(0x0CC8, new int [] {0x0CC6, 0x0CD6}); + table.put(0x0CC7, new int [] {0x0CC6, 0x0CD5}); + table.put(0x2F85F, new int [] {0x5962}); + table.put(0x2F85E, new int [] {0x5922}); + table.put(0x2F85D, new int [] {0x591A}); + table.put(0x0CC0, new int [] {0x0CBF, 0x0CD5}); + table.put(0x03CE, new int [] {0x03C9, 0x0301}); + table.put(0x2F85C, new int [] {0x5906}); + table.put(0x03CD, new int [] {0x03C5, 0x0301}); + table.put(0x2F85B, new int [] {0x58F7}); + table.put(0x03CC, new int [] {0x03BF, 0x0301}); + table.put(0x2F85A, new int [] {0x58F2}); + table.put(0x03CB, new int [] {0x03C5, 0x0308}); + table.put(0x03CA, new int [] {0x03B9, 0x0308}); + table.put(0x2F859, new int [] {0x214E4}); + table.put(0x2F858, new int [] {0x58AC}); + table.put(0x2F857, new int [] {0x5831}); + table.put(0x2F856, new int [] {0x5832}); + table.put(0x2F855, new int [] {0x578B}); + table.put(0x2F854, new int [] {0x580D}); + table.put(0x2F853, new int [] {0x57F4}); + table.put(0x2F852, new int [] {0x57CE}); + table.put(0x2F851, new int [] {0x58EE}); + table.put(0x2F850, new int [] {0x5207}); + table.put(0x2F84F, new int [] {0x5674}); + table.put(0x2F84E, new int [] {0x5651}); + table.put(0x2F84D, new int [] {0x5717}); + table.put(0x2F84C, new int [] {0x5606}); + table.put(0x2F84B, new int [] {0x5716}); + table.put(0x2F84A, new int [] {0x55C2}); + table.put(0x2F849, new int [] {0x55B3}); + table.put(0x2F848, new int [] {0x55AB}); + table.put(0x2F847, new int [] {0x5599}); + table.put(0x2F846, new int [] {0x5584}); + table.put(0x2F845, new int [] {0x5584}); + table.put(0x2F844, new int [] {0x5563}); + table.put(0x2F843, new int [] {0x5553}); + table.put(0x2F842, new int [] {0x5510}); + table.put(0x2F841, new int [] {0x54F6}); + table.put(0x2F840, new int [] {0x54A2}); + table.put(0x03B0, new int [] {0x03C5, 0x0308, 0x0301}); + table.put(0x2F83F, new int [] {0x5468}); + table.put(0x2F83E, new int [] {0x5448}); + table.put(0x03AF, new int [] {0x03B9, 0x0301}); + table.put(0x2F83D, new int [] {0x5438}); + table.put(0x03AE, new int [] {0x03B7, 0x0301}); + table.put(0x2F83C, new int [] {0x549E}); + table.put(0x03AD, new int [] {0x03B5, 0x0301}); + table.put(0x2F83B, new int [] {0x5406}); + table.put(0x03AC, new int [] {0x03B1, 0x0301}); + table.put(0x2F83A, new int [] {0x53F1}); + table.put(0x03AB, new int [] {0x03A5, 0x0308}); + table.put(0x03AA, new int [] {0x0399, 0x0308}); + table.put(0x2F839, new int [] {0x53EB}); + table.put(0x2F838, new int [] {0x20B63}); + table.put(0x2F837, new int [] {0x53DF}); + table.put(0x2F836, new int [] {0x53CA}); + table.put(0x2F835, new int [] {0x7070}); + table.put(0x2F834, new int [] {0x20A2C}); + table.put(0x2F833, new int [] {0x537F}); + table.put(0x2F832, new int [] {0x537F}); + table.put(0x2F831, new int [] {0x537F}); + table.put(0x2289, new int [] {0x2287, 0x0338}); + table.put(0x2F830, new int [] {0x537D}); + table.put(0x2288, new int [] {0x2286, 0x0338}); + table.put(0x2285, new int [] {0x2283, 0x0338}); + table.put(0x2284, new int [] {0x2282, 0x0338}); + table.put(0x2281, new int [] {0x227B, 0x0338}); + table.put(0x2280, new int [] {0x227A, 0x0338}); + table.put(0x2F82F, new int [] {0x5373}); + table.put(0x2F82E, new int [] {0x535A}); + table.put(0x2F82D, new int [] {0x5351}); + table.put(0x2F82C, new int [] {0x5349}); + table.put(0x2F82B, new int [] {0x5317}); + table.put(0x2F82A, new int [] {0x5306}); + table.put(0x2F829, new int [] {0x5305}); + table.put(0x2F828, new int [] {0x52FA}); + table.put(0x2F827, new int [] {0x52E4}); + table.put(0x2F826, new int [] {0x52C9}); + table.put(0x2F825, new int [] {0x52C7}); + table.put(0x2F824, new int [] {0x3515}); + table.put(0x2F823, new int [] {0x5277}); + table.put(0x2F822, new int [] {0x5272}); + table.put(0x2F821, new int [] {0x5246}); + table.put(0x2279, new int [] {0x2277, 0x0338}); + table.put(0x2F820, new int [] {0x523B}); + table.put(0x2278, new int [] {0x2276, 0x0338}); + table.put(0x2275, new int [] {0x2273, 0x0338}); + table.put(0x2274, new int [] {0x2272, 0x0338}); + table.put(0x2271, new int [] {0x2265, 0x0338}); + table.put(0x2270, new int [] {0x2264, 0x0338}); + table.put(0x2F81F, new int [] {0x34DF}); + table.put(0x2F81E, new int [] {0x5203}); + table.put(0x2F81D, new int [] {0x51F5}); + table.put(0x2F81C, new int [] {0x291DF}); + table.put(0x2F81B, new int [] {0x51B5}); + table.put(0xFB4E, new int [] {0x05E4, 0x05BF}); + table.put(0x2F81A, new int [] {0x51AC}); + table.put(0xFB4D, new int [] {0x05DB, 0x05BF}); + table.put(0xFB4C, new int [] {0x05D1, 0x05BF}); + table.put(0xFB4B, new int [] {0x05D5, 0x05B9}); + table.put(0x226F, new int [] {0x003E, 0x0338}); + table.put(0xFB4A, new int [] {0x05EA, 0x05BC}); + table.put(0x226E, new int [] {0x003C, 0x0338}); + table.put(0x226D, new int [] {0x224D, 0x0338}); + table.put(0x2F819, new int [] {0x4ECC}); + table.put(0x2F818, new int [] {0x51A4}); + table.put(0x2F817, new int [] {0x5197}); + table.put(0x2F816, new int [] {0x2054B}); + table.put(0xFB49, new int [] {0x05E9, 0x05BC}); + table.put(0x2F815, new int [] {0x518D}); + table.put(0xFB48, new int [] {0x05E8, 0x05BC}); + table.put(0x2F814, new int [] {0x5167}); + table.put(0xFB47, new int [] {0x05E7, 0x05BC}); + table.put(0x2F813, new int [] {0x34B9}); + table.put(0xFB46, new int [] {0x05E6, 0x05BC}); + table.put(0x2F812, new int [] {0x2051C}); + table.put(0x2F811, new int [] {0x5177}); + table.put(0xFB44, new int [] {0x05E4, 0x05BC}); + table.put(0x2F810, new int [] {0x5164}); + table.put(0xFB43, new int [] {0x05E3, 0x05BC}); + table.put(0xFB41, new int [] {0x05E1, 0x05BC}); + table.put(0xFB40, new int [] {0x05E0, 0x05BC}); + table.put(0x2262, new int [] {0x2261, 0x0338}); + table.put(0x2260, new int [] {0x003D, 0x0338}); + table.put(0x2F80F, new int [] {0x5154}); + table.put(0x2F80E, new int [] {0x514D}); + table.put(0x2F80D, new int [] {0x2063A}); + table.put(0x2F80C, new int [] {0x349E}); + table.put(0x2F80B, new int [] {0x50CF}); + table.put(0xFB3E, new int [] {0x05DE, 0x05BC}); + table.put(0x2F80A, new int [] {0x50E7}); + table.put(0xFB3C, new int [] {0x05DC, 0x05BC}); + table.put(0xFB3B, new int [] {0x05DB, 0x05BC}); + table.put(0xFB3A, new int [] {0x05DA, 0x05BC}); + table.put(0x2F809, new int [] {0x5099}); + table.put(0x2F808, new int [] {0x507A}); + table.put(0x2F807, new int [] {0x5002}); + table.put(0x2F806, new int [] {0x4FBB}); + table.put(0xFB39, new int [] {0x05D9, 0x05BC}); + table.put(0x2F805, new int [] {0x4FAE}); + table.put(0xFB38, new int [] {0x05D8, 0x05BC}); + table.put(0x2F804, new int [] {0x4F60}); + table.put(0x2F803, new int [] {0x20122}); + table.put(0xFB36, new int [] {0x05D6, 0x05BC}); + table.put(0x2F802, new int [] {0x4E41}); + table.put(0xFB35, new int [] {0x05D5, 0x05BC}); + table.put(0x2F801, new int [] {0x4E38}); + table.put(0xFB34, new int [] {0x05D4, 0x05BC}); + table.put(0x2F800, new int [] {0x4E3D}); + table.put(0x06D3, new int [] {0x06D2, 0x0654}); + table.put(0xFB33, new int [] {0x05D3, 0x05BC}); + table.put(0xFB32, new int [] {0x05D2, 0x05BC}); + table.put(0xFB31, new int [] {0x05D1, 0x05BC}); + table.put(0xFB30, new int [] {0x05D0, 0x05BC}); + table.put(0xFB2F, new int [] {0x05D0, 0x05B8}); + table.put(0xFB2E, new int [] {0x05D0, 0x05B7}); + table.put(0xFB2D, new int [] {0x05E9, 0x05BC, 0x05C2}); + table.put(0xFB2C, new int [] {0x05E9, 0x05BC, 0x05C1}); + table.put(0xFB2B, new int [] {0x05E9, 0x05C2}); + table.put(0xFB2A, new int [] {0x05E9, 0x05C1}); + table.put(0x2249, new int [] {0x2248, 0x0338}); + table.put(0x2247, new int [] {0x2245, 0x0338}); + table.put(0x06C2, new int [] {0x06C1, 0x0654}); + table.put(0x06C0, new int [] {0x06D5, 0x0654}); + table.put(0x2244, new int [] {0x2243, 0x0338}); + table.put(0x0FB9, new int [] {0x0F90, 0x0FB5}); + table.put(0x2241, new int [] {0x223C, 0x0338}); + table.put(0xFB1F, new int [] {0x05F2, 0x05B7}); + table.put(0xFB1D, new int [] {0x05D9, 0x05B4}); + table.put(0x0FAC, new int [] {0x0FAB, 0x0FB7}); + table.put(0x0FA7, new int [] {0x0FA6, 0x0FB7}); + table.put(0x0FA2, new int [] {0x0FA1, 0x0FB7}); + table.put(0x2226, new int [] {0x2225, 0x0338}); + table.put(0x2224, new int [] {0x2223, 0x0338}); + table.put(0x220C, new int [] {0x220B, 0x0338}); + table.put(0x2209, new int [] {0x2208, 0x0338}); + table.put(0x0390, new int [] {0x03B9, 0x0308, 0x0301}); + table.put(0x2204, new int [] {0x2203, 0x0338}); + table.put(0x038F, new int [] {0x03A9, 0x0301}); + table.put(0x038E, new int [] {0x03A5, 0x0301}); + table.put(0x038C, new int [] {0x039F, 0x0301}); + table.put(0x09DF, new int [] {0x09AF, 0x09BC}); + table.put(0x038A, new int [] {0x0399, 0x0301}); + table.put(0x09DD, new int [] {0x09A2, 0x09BC}); + table.put(0x09DC, new int [] {0x09A1, 0x09BC}); + table.put(0x0389, new int [] {0x0397, 0x0301}); + table.put(0x0388, new int [] {0x0395, 0x0301}); + table.put(0x0387, new int [] {0x00B7}); + table.put(0x0386, new int [] {0x0391, 0x0301}); + table.put(0x0385, new int [] {0x00A8, 0x0301}); + table.put(0x037E, new int [] {0x003B}); + table.put(0x09CC, new int [] {0x09C7, 0x09D7}); + table.put(0x09CB, new int [] {0x09C7, 0x09BE}); + table.put(0x0374, new int [] {0x02B9}); + table.put(0x0C48, new int [] {0x0C46, 0x0C56}); + table.put(0x0F9D, new int [] {0x0F9C, 0x0FB7}); + table.put(0x0344, new int [] {0x0308, 0x0301}); + table.put(0x0343, new int [] {0x0313}); + table.put(0x0341, new int [] {0x0301}); + table.put(0x0340, new int [] {0x0300}); + table.put(0x0F93, new int [] {0x0F92, 0x0FB7}); + table.put(0x0F81, new int [] {0x0F71, 0x0F80}); + table.put(0x0F78, new int [] {0x0FB3, 0x0F80}); + table.put(0x1D1C0, new int [] {0x1D1BA, 0x1D165, 0x1D16F}); + table.put(0x0F76, new int [] {0x0FB2, 0x0F80}); + table.put(0x0F75, new int [] {0x0F71, 0x0F74}); + table.put(0x0F73, new int [] {0x0F71, 0x0F72}); + table.put(0x1D1BF, new int [] {0x1D1B9, 0x1D165, 0x1D16F}); + table.put(0x1D1BE, new int [] {0x1D1BA, 0x1D165, 0x1D16E}); + table.put(0x1D1BD, new int [] {0x1D1B9, 0x1D165, 0x1D16E}); + table.put(0x1D1BC, new int [] {0x1D1BA, 0x1D165}); + table.put(0x1D1BB, new int [] {0x1D1B9, 0x1D165}); + table.put(0x0F69, new int [] {0x0F40, 0x0FB5}); + table.put(0x0F5C, new int [] {0x0F5B, 0x0FB7}); + table.put(0xFAD9, new int [] {0x9F8E}); + table.put(0xFAD8, new int [] {0x9F43}); + table.put(0xFAD7, new int [] {0x27ED3}); + table.put(0xFAD6, new int [] {0x25CD0}); + table.put(0xFAD5, new int [] {0x25249}); + table.put(0xFAD4, new int [] {0x4039}); + table.put(0xFAD3, new int [] {0x4018}); + table.put(0xFAD2, new int [] {0x3B9D}); + table.put(0xFAD1, new int [] {0x233D5}); + table.put(0xFAD0, new int [] {0x22844}); + table.put(0x0F57, new int [] {0x0F56, 0x0FB7}); + table.put(0x0F52, new int [] {0x0F51, 0x0FB7}); + table.put(0xFACF, new int [] {0x2284A}); + table.put(0xFACE, new int [] {0x9F9C}); + table.put(0xFACD, new int [] {0x9B12}); + table.put(0xFACC, new int [] {0x983B}); + table.put(0xFACB, new int [] {0x980B}); + table.put(0xFACA, new int [] {0x97FF}); + table.put(0x0F4D, new int [] {0x0F4C, 0x0FB7}); + table.put(0xFAC9, new int [] {0x97DB}); + table.put(0xFAC8, new int [] {0x9756}); + table.put(0xFAC7, new int [] {0x96E3}); + table.put(0xFAC6, new int [] {0x967C}); + table.put(0xFAC5, new int [] {0x9276}); + table.put(0x2ADC, new int [] {0x2ADD, 0x0338}); + table.put(0xFAC4, new int [] {0x9199}); + table.put(0xFAC3, new int [] {0x9072}); + table.put(0xFAC2, new int [] {0x8F38}); + table.put(0xFAC1, new int [] {0x8D08}); + table.put(0xFAC0, new int [] {0x8B8A}); + table.put(0x0F43, new int [] {0x0F42, 0x0FB7}); + table.put(0xFABF, new int [] {0x8B39}); + table.put(0xFABE, new int [] {0x8AED}); + table.put(0xFABD, new int [] {0x8AFE}); + table.put(0xFABC, new int [] {0x8B01}); + table.put(0xFABB, new int [] {0x8ACB}); + table.put(0xFABA, new int [] {0x8AF8}); + table.put(0xFAB9, new int [] {0x8ABF}); + table.put(0xFAB8, new int [] {0x8996}); + table.put(0xFAB7, new int [] {0x8986}); + table.put(0xFAB6, new int [] {0x8941}); + table.put(0xFAB5, new int [] {0x8779}); + table.put(0xFAB4, new int [] {0x83EF}); + table.put(0xFAB3, new int [] {0x8352}); + table.put(0xFAB2, new int [] {0x8005}); + table.put(0xFAB1, new int [] {0x7F3E}); + table.put(0xFAB0, new int [] {0x7DF4}); + table.put(0xFAAF, new int [] {0x7D5B}); + table.put(0xFAAE, new int [] {0x7C7B}); + table.put(0xFAAD, new int [] {0x7BC0}); + table.put(0xFAAC, new int [] {0x7AB1}); + table.put(0xFAAB, new int [] {0x78CC}); + table.put(0x2FA1D, new int [] {0x2A600}); + table.put(0x21CF, new int [] {0x21D2, 0x0338}); + table.put(0xFAAA, new int [] {0x7740}); + table.put(0x2FA1C, new int [] {0x9F3B}); + table.put(0x21CE, new int [] {0x21D4, 0x0338}); + table.put(0x2FA1B, new int [] {0x9F16}); + table.put(0x21CD, new int [] {0x21D0, 0x0338}); + table.put(0x2FA1A, new int [] {0x9F0F}); + table.put(0xFAA9, new int [] {0x774A}); + table.put(0xFAA8, new int [] {0x76F4}); + table.put(0xFAA7, new int [] {0x76DB}); + table.put(0x2FA19, new int [] {0x9F05}); + table.put(0xFAA6, new int [] {0x76CA}); + table.put(0x2FA18, new int [] {0x9EFE}); + table.put(0xFAA5, new int [] {0x761F}); + table.put(0x2FA17, new int [] {0x9EF9}); + table.put(0xFAA4, new int [] {0x761D}); + table.put(0x2FA16, new int [] {0x4D56}); + table.put(0xFAA3, new int [] {0x753B}); + table.put(0x2FA15, new int [] {0x9EBB}); + table.put(0xFAA2, new int [] {0x7506}); + table.put(0x2FA14, new int [] {0x2A291}); + table.put(0xFAA1, new int [] {0x7471}); + table.put(0x2FA13, new int [] {0x2A20E}); + table.put(0xFAA0, new int [] {0x732A}); + table.put(0x2FA12, new int [] {0x2A105}); + table.put(0x2FA11, new int [] {0x4CF8}); + table.put(0x2FA10, new int [] {0x2A0CE}); + table.put(0x2FA0F, new int [] {0x9D67}); + table.put(0x2FA0E, new int [] {0x4CED}); + table.put(0x2FA0D, new int [] {0x4CCE}); + table.put(0x2FA0C, new int [] {0x9CFD}); + table.put(0x2FA0B, new int [] {0x9C40}); + table.put(0x2FA0A, new int [] {0x9B12}); + table.put(0x2FA09, new int [] {0x29B30}); + table.put(0x0626, new int [] {0x064A, 0x0654}); + table.put(0x2FA08, new int [] {0x4BCE}); + table.put(0x0625, new int [] {0x0627, 0x0655}); + table.put(0x2FA07, new int [] {0x99FE}); + table.put(0x0624, new int [] {0x0648, 0x0654}); + table.put(0x2FA06, new int [] {0x99C2}); + table.put(0x0623, new int [] {0x0627, 0x0654}); + table.put(0x2FA05, new int [] {0x99A7}); + table.put(0x0622, new int [] {0x0627, 0x0653}); + table.put(0x2FA04, new int [] {0x9929}); + table.put(0x2FA03, new int [] {0x4B33}); + table.put(0x2FA02, new int [] {0x98E2}); + table.put(0x2FA01, new int [] {0x295B6}); + table.put(0x2FA00, new int [] {0x9829}); + table.put(0x21AE, new int [] {0x2194, 0x0338}); + table.put(0x095F, new int [] {0x092F, 0x093C}); + table.put(0x095E, new int [] {0x092B, 0x093C}); + table.put(0x095D, new int [] {0x0922, 0x093C}); + table.put(0x095C, new int [] {0x0921, 0x093C}); + table.put(0x095B, new int [] {0x091C, 0x093C}); + table.put(0x095A, new int [] {0x0917, 0x093C}); + table.put(0x0959, new int [] {0x0916, 0x093C}); + table.put(0x0958, new int [] {0x0915, 0x093C}); + table.put(0x0934, new int [] {0x0933, 0x093C}); + table.put(0x0931, new int [] {0x0930, 0x093C}); + return table; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java new file mode 100644 index 000000000..012f6c9fd --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class represents high surrogate character. + */ +class HighSurrogateCharSet extends JointSet{ + + /* + * Note that we can use high and low surrogate characters + * that don't combine into supplementary code point. + * See http://www.unicode.org/reports/tr18/#Supplementary_Characters + */ + + private char high; + + public HighSurrogateCharSet(char high) { + this.high = high; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + + if (stringIndex + 1 > strLength) { + matchResult.hitEnd = true; + return -1; + } + + char high = testString.charAt(stringIndex); + + if (stringIndex + 1 < strLength) { + char low = testString.charAt(stringIndex + 1); + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isLowSurrogate(low)) { + return -1; + } + } + + if (this.high == high) { + return next.matches(stringIndex + 1, testString, + matchResult); + } + + return -1; + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + String testStr = (String) testString; + int strLength = matchResult.getRightBound(); + + while (strIndex < strLength) { + + strIndex = testStr.indexOf(high, strIndex); + if (strIndex < 0) + return -1; + + if (strIndex + 1 < strLength ) { + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isLowSurrogate(testStr.charAt(strIndex + 1))) { + strIndex += 2; + continue; + } + } + + if (next.matches(strIndex + 1, testString, matchResult) >= 0) { + return strIndex; + } + strIndex++; + } + + return -1; + } + + return super.find(strIndex, testString, matchResult); + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + String testStr = (String) testString; + int strLength = matchResult.getRightBound(); + + while (lastIndex >= strIndex) { + lastIndex = testStr.lastIndexOf(high, lastIndex); + if (lastIndex < 0 || lastIndex < strIndex) { + return -1; + } + + if (lastIndex + 1 < strLength) { + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isLowSurrogate(testStr.charAt(lastIndex + 1))) { + lastIndex--; + continue; + } + } + + if (next.matches(lastIndex + 1, testString, matchResult) >= 0) { + return lastIndex; + } + + lastIndex--; + } + + return -1; + } + + return super.findBack(strIndex, lastIndex, testString, matchResult); + } + + protected String getName() { + return "" + high; + } + + protected int getChar() { + return high; + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return false; + } else if (set instanceof RangeSet) { + return false; + } else if (set instanceof SupplRangeSet) { + return false; + } else if (set instanceof SupplCharSet) { + return false; + } else if (set instanceof LowSurrogateCharSet) { + return false; + } else if (set instanceof HighSurrogateCharSet) { + return ((HighSurrogateCharSet) set).high == this.high; + } + + return true; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/I18n.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/I18n.java new file mode 100644 index 000000000..e1f950959 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/I18n.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ + +package org.teavm.classlib.java.util.regex; +import java.text.MessageFormat; + +/** + * Internationalization stub. All the messages in java.util.regexp + * package done though this class. This class should be lately replaced with + * real internationalization utility. + * + * @author Nikolay A. Kuznetsov + * + */ +class I18n { + public static String getMessage(String message) { + return message; + } + + public static String getFormattedMessage(String message, Object arg1) { + return MessageFormat.format(message, new Object[] {arg1}); + } + + public static String getFormattedMessage(String message, Object arg1, Object arg2) { + return MessageFormat.format(message, new Object[] {arg1, arg2}); + } + +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntArrHash.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntArrHash.java new file mode 100644 index 000000000..8369f3be5 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntArrHash.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Hashtable implementation for int arrays. + */ +class IntArrHash { + final int[] table; + + final Object[] values; + + final int mask; + + public IntArrHash(int size) { + int tmpMask = 0; + while (size >= tmpMask) { + tmpMask = (tmpMask << 1) | 1; + } + mask = (tmpMask << 1) | 1; + table = new int[mask + 1]; + values = new Object [mask + 1]; + } + + public void put(int key, int [] value) { + int i = 0; + int hashCode = key & mask; + + for (; ; ) { + if (table[hashCode] == 0 // empty + || table[hashCode] == key) { // rewrite + table[hashCode] = key; + values[hashCode] = value; + return; + } + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + + public int [] get(int key) { + int hashCode = key & mask; + int i = 0; + int storedKey; + + for (; ; ) { + storedKey = table[hashCode]; + + if (storedKey == 0) { // empty + return null; + } + + if (storedKey == key) { + return (int []) values[hashCode]; + } + + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntHash.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntHash.java new file mode 100644 index 000000000..295c04620 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/IntHash.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Hashtable implementation for int values. + */ +class IntHash { + int[] table; + + int[] values; + + int mask; + + int size; // maximum shift + + public IntHash(int size) { + while (size >= mask) { + mask = (mask << 1) | 1; + } + mask = (mask << 1) | 1; + table = new int[mask + 1]; + values = new int[mask + 1]; + this.size = size; + } + + public void put(int key, int value) { + int i = 0; + int hashCode = key & mask; + + for (; ; ) { + if (table[hashCode] == 0 // empty + || table[hashCode] == key) { // rewrite + table[hashCode] = key; + values[hashCode] = value; + return; + } + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + + public int get(int key) { + int hashCode = key & mask; + int i = 0; + int storedKey; + + for (; ; ) { + storedKey = table[hashCode]; + + if (storedKey == 0) { // empty + return size; + } + + if (storedKey == key) { + return values[hashCode]; + } + + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java new file mode 100644 index 000000000..8b85df35b --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * Represents group, which is alternation of other subexpression. + * One should think about "group" in this model as JointSet opening + * group and corresponding FSet closing group. + */ +class JointSet extends AbstractSet { + + protected ArrayList children; + + protected AbstractSet fSet; + + protected int groupIndex; + + protected JointSet() { + } + + public JointSet(ArrayList children, FSet fSet) { + this.children = children; + this.fSet = fSet; + this.groupIndex = fSet.getGroupIndex(); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (children == null) { + return -1; + } + int start = matchResult.getStart(groupIndex); + matchResult.setStart(groupIndex, stringIndex); + int size = children.size(); + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + int shift = e.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + } + matchResult.setStart(groupIndex, start); + return -1; + } + + public void setNext(AbstractSet next) { + fSet.setNext(next); + } + + public AbstractSet getNext() { + return fSet.getNext(); + } + + protected String getName() { + return "JointSet"; //$NON-NLS-1$ + } + + public int getGroup() { + return groupIndex; + } + + public boolean first(AbstractSet set) { + if (children != null) { + for (Iterator i = children.iterator(); i.hasNext();) { + if (((AbstractSet) i.next()).first(set)) { + return true; + } + } + } + + return false; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return !(matchResult.getEnd(groupIndex) >= 0 && matchResult + .getStart(groupIndex) == matchResult.getEnd(groupIndex)); + } + + /** + * This method is used for traversing nodes after the + * first stage of compilation. + */ + public void processSecondPass() { + this.isSecondPassVisited = true; + + if (fSet != null && !fSet.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + + /* + * End code to do during the pass + */ + fSet.processSecondPass(); + } + + if (children != null) { + int childrenSize = children.size(); + + for (int i = 0; i < childrenSize; i++) { + AbstractSet child = (AbstractSet) children.get(i); + + /* + * Add here code to do during the pass + */ + + JointSet set = child.processBackRefReplacement(); + + if (set != null) { + child.isSecondPassVisited = true; + children.remove(i); + children.add(i, set); + child = (AbstractSet) set; + } + + /* + * End code to do during the pass + */ + if (!child.isSecondPassVisited) { + child.processSecondPass(); + } + } + } + + if (next != null) { + super.processSecondPass(); + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java new file mode 100644 index 000000000..f56a6c4f4 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + + +/** + * @author Nikolay A. Kuznetsov + */ +class LeafQuantifierSet extends QuantifierSet { + + protected LeafSet leaf; + + public LeafQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + this.leaf = innerSet; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int i = 0; + int shift = 0; + + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() + && (shift = leaf.accepts(stringIndex, testString)) > 0) { + stringIndex += shift; + i++; + } + + for (; i >= 0; i--) { + shift = next.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + + stringIndex -= leaf.charCount(); + } + return -1; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } + + /** + * Sets an inner set. + * @param innerSet + * The innerSet to set. + */ + public void setInnerSet(AbstractSet innerSet) { + if (!(innerSet instanceof LeafSet)) + throw new RuntimeException(""); + super.setInnerSet(innerSet); + this.leaf = (LeafSet) innerSet; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java new file mode 100644 index 000000000..75a38c981 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Base class for nodes representing leaf tokens of the RE, those who consumes + * fixed number of characters. + * + * @author Nikolay A. Kuznetsov + */ +abstract class LeafSet extends AbstractSet { + + protected int charCount = 1; + + public LeafSet(AbstractSet next) { + super(next); + setType(AbstractSet.TYPE_LEAF); + } + + public LeafSet() { + } + + /** + * Returns "shift", the number of accepted chars commonly internal function, + * but called by quantifiers. + */ + public abstract int accepts(int stringIndex, CharSequence testString); + + /** + * Checks if we can enter this state and pass the control to the next one. + * Return positive value if match succeeds, negative otherwise. + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (stringIndex + charCount() > matchResult.getRightBound()) { + matchResult.hitEnd = true; + return -1; + } + + int shift = accepts(stringIndex, testString); + if (shift < 0) { + return -1; + } + + return next.matches(stringIndex + shift, testString, matchResult); + } + + /** + * Returns number of characters this node consumes. + * @return number of characters this node consumes. + */ + public int charCount() { + return charCount; + } + + public boolean hasConsumed(MatchResultImpl mr) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java new file mode 100644 index 000000000..78244f389 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java @@ -0,0 +1,1273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.MissingResourceException; + +/** + * The purpose of this class is to break given pattern into RE tokens; + * + * @author Nikolay A. Kuznetsov + */ +class Lexer { + + public static final int CHAR_DOLLAR = 0xe0000000 | '$'; + + public static final int CHAR_RIGHT_PARENTHESIS = 0xe0000000 | ')'; + + public static final int CHAR_LEFT_SQUARE_BRACKET = 0xe0000000 | '['; + + public static final int CHAR_RIGHT_SQUARE_BRACKET = 0xe0000000 | ']'; + + public static final int CHAR_CARET = 0xe0000000 | '^'; + + public static final int CHAR_VERTICAL_BAR = 0xe0000000 | '|'; + + public static final int CHAR_AMPERSAND = 0xe0000000 | '&'; + + public static final int CHAR_HYPHEN = 0xe0000000 | '-'; + + public static final int CHAR_DOT = 0xe0000000 | '.'; + + public static final int QMOD_GREEDY = 0xe0000000; + + public static final int QMOD_RELUCTANT = 0xc0000000; + + public static final int QMOD_POSSESSIVE = 0x80000000; + + public static final int QUANT_STAR = QMOD_GREEDY | '*'; + + public static final int QUANT_STAR_P = QMOD_POSSESSIVE | '*'; + + public static final int QUANT_STAR_R = QMOD_RELUCTANT | '*'; + + public static final int QUANT_PLUS = QMOD_GREEDY | '+'; + + public static final int QUANT_PLUS_P = QMOD_POSSESSIVE | '+'; + + public static final int QUANT_PLUS_R = QMOD_RELUCTANT | '+'; + + public static final int QUANT_ALT = QMOD_GREEDY | '?'; + + public static final int QUANT_ALT_P = QMOD_POSSESSIVE | '?'; + + public static final int QUANT_ALT_R = QMOD_RELUCTANT | '?'; + + public static final int QUANT_COMP = QMOD_GREEDY | '{'; + + public static final int QUANT_COMP_P = QMOD_POSSESSIVE | '{'; + + public static final int QUANT_COMP_R = QMOD_RELUCTANT | '{'; + + public static final int CHAR_LEFT_PARENTHESIS = 0x80000000 | '('; + + public static final int CHAR_NONCAP_GROUP = 0xc0000000 | '('; + + public static final int CHAR_POS_LOOKAHEAD = 0xe0000000 | '('; + + public static final int CHAR_NEG_LOOKAHEAD = 0xf0000000 | '('; + + public static final int CHAR_POS_LOOKBEHIND = 0xf8000000 | '('; + + public static final int CHAR_NEG_LOOKBEHIND = 0xfc000000 | '('; + + public static final int CHAR_ATOMIC_GROUP = 0xfe000000 | '('; + + public static final int CHAR_FLAGS = 0xff000000 | '('; + + public static final int CHAR_START_OF_INPUT = 0x80000000 | 'A'; + + public static final int CHAR_WORD_BOUND = 0x80000000 | 'b'; + + public static final int CHAR_NONWORD_BOUND = 0x80000000 | 'B'; + + public static final int CHAR_PREVIOUS_MATCH = 0x80000000 | 'G'; + + public static final int CHAR_END_OF_INPUT = 0x80000000 | 'z'; + + public static final int CHAR_END_OF_LINE = 0x80000000 | 'Z'; + + public static final int MODE_PATTERN = 1 << 0; + + public static final int MODE_RANGE = 1 << 1; + + public static final int MODE_ESCAPE = 1 << 2; + + //maximum length of decomposition + static final int MAX_DECOMPOSITION_LENGTH = 4; + + /* + * maximum length of Hangul decomposition + * note that MAX_HANGUL_DECOMPOSITION_LENGTH <= MAX_DECOMPOSITION_LENGTH + */ + static final int MAX_HANGUL_DECOMPOSITION_LENGTH = 3; + + /* + * Following constants are needed for Hangul canonical decomposition. + * Hangul decomposition algorithm and constants are taken according + * to description at http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * "3.12 Conjoining Jamo Behavior" + */ + static final int SBase = 0xAC00; + + static final int LBase = 0x1100; + + static final int VBase = 0x1161; + + static final int TBase = 0x11A7; + + static final int SCount = 11172; + + static final int LCount = 19; + + static final int VCount = 21; + + static final int TCount = 28; + + static final int NCount = 588; + + //table that contains canonical decomposition mappings + private static IntArrHash decompTable = null; + + //table that contains canonical combining classes + private static IntHash canonClassesTable = null; + + private static int canonClassesTableSize; + + /* + * Table that contains information about Unicode codepoints with + * single codepoint decomposition + */ + private static IntHash singleDecompTable = null; + + private static int singleDecompTableSize; + + private char[] pattern = null; + + private int flags = 0; + + private int mode = 1; + + // when in literal mode, this field will save the previous one + private int saved_mode = 0; + + // previous char read + private int lookBack; + + //current character read + private int ch; + + //next character + private int lookAhead; + + //index of last char in pattern plus one + private int patternFullLength = 0; + + // cur special token + private SpecialToken curST = null; + + // next special token + private SpecialToken lookAheadST = null; + + // cur char being processed + private int index = 0; + + // previous non-whitespace character index; + private int prevNW = 0; + + // cur token start index + private int curToc = 0; + + // look ahead token index + private int lookAheadToc = 0; + + // original string representing pattern + private String orig = null; + + public Lexer(String pattern, int flags) { + orig = pattern; + if ((flags & TPattern.LITERAL) > 0) { + pattern = TPattern.quote(pattern); + } else if ((flags & TPattern.CANON_EQ) > 0) { + pattern = Lexer.normalize(pattern); + } + + this.pattern = new char[pattern.length() + 2]; + System.arraycopy(pattern.toCharArray(), 0, this.pattern, 0, + pattern.length()); + this.pattern[this.pattern.length - 1] = 0; + this.pattern[this.pattern.length - 2] = 0; + patternFullLength = this.pattern.length; + this.flags = flags; + // read first two tokens; + movePointer(); + movePointer(); + + } + + /** + * Returns current character w/o reading next one; if there are no more + * characters returns 0; + * + * @return current character; + */ + public int peek() { + return ch; + } + + /** + * Set the Lexer to PATTERN or RANGE mode; Lexer interpret character two + * different ways in parser or range modes. + * + * @param mode + * Lexer.PATTERN or Lexer.RANGE + */ + public void setMode(int mode) { + if (mode > 0 && mode < 3) { + this.mode = mode; + } + + if (mode == Lexer.MODE_PATTERN) { + reread(); + } + } + + /** + * Restores flags for Lexer + * + * @param flags + */ + public void restoreFlags(int flags) { + this.flags = flags; + lookAhead = ch; + lookAheadST = curST; + + //curToc is an index of closing bracket ) + index = curToc + 1; + lookAheadToc = curToc; + movePointer(); + } + + public SpecialToken peekSpecial() { + return curST; + } + + /** + * Returns true, if current token is special, i.e. quantifier, or other + * compound token. + * + * @return - true if current token is special, false otherwise. + */ + public boolean isSpecial() { + return curST != null; + } + + public boolean isQuantifier() { + return isSpecial() && curST.getType() == SpecialToken.TOK_QUANTIFIER; + } + + public boolean isNextSpecial() { + return lookAheadST != null; + } + + /** + * Returns current character and moves string index to the next one; + * + */ + public int next() { + movePointer(); + return lookBack; + } + + /** + * Returns current special token and moves string index to the next one; + */ + public SpecialToken nextSpecial() { + SpecialToken res = curST; + movePointer(); + return res; + } + + /** + * Returns nest symbol read. + */ + public int lookAhead() { + return lookAhead; + } + + /** + * Returns previous character. + */ + public int back() { + return lookBack; + } + + /** + * Normalize given expression. + * + * @param input - expression to normalize + * @return normalized expression. + */ + static String normalize(String input) { + char [] inputChars = input.toCharArray(); + int inputLength = inputChars.length; + int resCodePointsIndex = 0; + int inputCodePointsIndex = 0; + int decompHangulIndex = 0; + + //codePoints of input + int [] inputCodePoints = new int [inputLength]; + + //result of canonical decomposition of input + int [] resCodePoints = new int [inputLength * MAX_DECOMPOSITION_LENGTH]; + + //current symbol's codepoint + int ch; + + //current symbol's decomposition + int [] decomp; + + //result of canonical and Hangul decomposition of input + int [] decompHangul; + + //result of canonical decomposition of input in UTF-16 encoding + StringBuilder result = new StringBuilder(); + + decompTable = HashDecompositions.getHashDecompositions(); + canonClassesTable = CanClasses.getHashCanClasses(); + canonClassesTableSize = canonClassesTable.size; + singleDecompTable = SingleDecompositions.getHashSingleDecompositions(); + singleDecompTableSize = singleDecompTable.size; + + for (int i = 0; i < inputLength; i += Character.charCount(ch)) { + ch = Character.codePointAt(inputChars, i); + inputCodePoints[inputCodePointsIndex++] = ch; + } + + /* + * Canonical decomposition based on mappings in decompTable + */ + for (int i = 0; i < inputCodePointsIndex; i++) { + ch = inputCodePoints[i]; + + decomp = Lexer.getDecomposition(ch); + if (decomp == null) { + resCodePoints[resCodePointsIndex++] = ch; + } else { + int curSymbDecompLength = decomp.length; + + for (int j = 0; j < curSymbDecompLength; j++) { + resCodePoints[resCodePointsIndex++] = decomp[j]; + } + } + } + + /* + * Canonical ordering. + * See http://www.unicode.org/reports/tr15/#Decomposition for + * details + */ + resCodePoints = Lexer.getCanonicalOrder(resCodePoints, + resCodePointsIndex); + + /* + * Decomposition for Hangul syllables. + * See http://www.unicode.org/reports/tr15/#Hangul for + * details + */ + decompHangul = new int [resCodePoints.length]; + + for (int i = 0; i < resCodePointsIndex; i++) { + int curSymb = resCodePoints[i]; + + decomp = getHangulDecomposition(curSymb); + if (decomp == null) { + decompHangul[decompHangulIndex++] = curSymb; + } else{ + + /* + * Note that Hangul decompositions have length that is + * equal 2 or 3. + */ + decompHangul[decompHangulIndex++] = decomp[0]; + decompHangul[decompHangulIndex++] = decomp[1]; + if (decomp.length == 3) { + decompHangul[decompHangulIndex++] = decomp[2]; + } + } + } + + /* + * Translating into UTF-16 encoding + */ + for (int i = 0; i < decompHangulIndex; i++) { + result.append(Character.toChars(decompHangul[i])); + } + + return result.toString(); + } + + /** + * Rearrange codepoints according + * to canonical order. + * + * @param inputInts - array that contains Unicode codepoints + * @param length - index of last Unicode codepoint plus 1 + * + * @return array that contains rearranged codepoints. + */ + static int [] getCanonicalOrder(int [] inputInts, int length) { + int inputLength = (length < inputInts.length) + ? length + : inputInts.length; + + /* + * Simple bubble-sort algorithm. + * Note that many codepoints have 0 + * canonical class, so this algorithm works + * almost lineary in overwhelming majority + * of cases. This is due to specific of Unicode + * combining classes and codepoints. + */ + for (int i = 1; i < inputLength; i++) { + int j = i - 1; + int iCanonicalClass = getCanonicalClass(inputInts[i]); + int ch; + + if (iCanonicalClass == 0) { + continue; + } + + while (j > -1) { + if (getCanonicalClass(inputInts[j]) > iCanonicalClass) { + j = j - 1; + } else { + break; + } + } + + ch = inputInts [i]; + for (int k = i; k > j + 1; k--) { + inputInts[k] = inputInts [k - 1]; + } + inputInts[j + 1] = ch; + } + + return inputInts; + } + + /** + * Reread current character, may be require if previous token changes mode + * to one with different character interpretation. + * + */ + private void reread() { + lookAhead = ch; + lookAheadST = curST; + index = lookAheadToc; + lookAheadToc = curToc; + movePointer(); + } + + /** + * Moves pointer one position right; save current character to lookBack; + * lookAhead to current one and finally read one more to lookAhead; + */ + private void movePointer() { + // swap pointers + lookBack = ch; + ch = lookAhead; + curST = lookAheadST; + curToc = lookAheadToc; + lookAheadToc = index; + boolean reread; + do { + reread = false; + // read next character analyze it and construct token: + // // + + lookAhead = (index < pattern.length) ? nextCodePoint() : 0; + lookAheadST = null; + + if (mode == Lexer.MODE_ESCAPE) { + if (lookAhead == '\\') { + + //need not care about supplementary codepoints here + lookAhead = (index < pattern.length) ? pattern[nextIndex()] + : 0; + + switch (lookAhead) { + case 'E': { + mode = saved_mode; + + lookAhead = (index <= pattern.length - 2) + ? nextCodePoint() + : 0; + break; + } + + default: { + lookAhead = '\\'; + index = prevNW; + return; + } + } + } else { + return; + } + } + + if (lookAhead == '\\') { + + lookAhead = (index < pattern.length - 2) ? nextCodePoint() + : -1; + switch (lookAhead) { + case -1: + throw new TPatternSyntaxException("", this.toString(), index); + case 'P': + case 'p': { + String cs = parseCharClassName(); + boolean negative = false; + + if (lookAhead == 'P') + negative = true; + try { + lookAheadST = AbstractCharClass.getPredefinedClass(cs, + negative); + } catch (MissingResourceException mre) { + throw new TPatternSyntaxException("", this.toString(), index); + } + lookAhead = 0; + break; + } + + case 'w': + case 's': + case 'd': + case 'W': + case 'S': + case 'D': { + lookAheadST = CharClass.getPredefinedClass(new String( + pattern, prevNW, 1), false); + lookAhead = 0; + break; + } + + case 'Q': { + saved_mode = mode; + mode = Lexer.MODE_ESCAPE; + reread = true; + break; + } + + case 't': + lookAhead = '\t'; + break; + case 'n': + lookAhead = '\n'; + break; + case 'r': + lookAhead = '\r'; + break; + case 'f': + lookAhead = '\f'; + break; + case 'a': + lookAhead = '\u0007'; + break; + case 'e': + lookAhead = '\u001B'; + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (mode == Lexer.MODE_PATTERN) { + lookAhead = 0x80000000 | lookAhead; + } + break; + } + + case '0': + lookAhead = readOctals(); + break; + case 'x': + lookAhead = readHex("hexadecimal", 2); //$NON-NLS-1$ + break; + case 'u': + lookAhead = readHex("Unicode", 4); //$NON-NLS-1$ + break; + + case 'b': + lookAhead = CHAR_WORD_BOUND; + break; + case 'B': + lookAhead = CHAR_NONWORD_BOUND; + break; + case 'A': + lookAhead = CHAR_START_OF_INPUT; + break; + case 'G': + lookAhead = CHAR_PREVIOUS_MATCH; + break; + case 'Z': + lookAhead = CHAR_END_OF_LINE; + break; + case 'z': + lookAhead = CHAR_END_OF_INPUT; + break; + case 'c': { + if (index < pattern.length - 2) { + + //need not care about supplementary codepoints here + lookAhead = (pattern[nextIndex()] & 0x1f); + break; + } else { + throw new TPatternSyntaxException("", this.toString(), index); + } + } + case 'C': + case 'E': + case 'F': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'R': + case 'T': + case 'U': + case 'V': + case 'X': + case 'Y': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'o': + case 'q': + case 'y': + throw new TPatternSyntaxException("", this.toString(), index); + + default: + break; + } + } else if (mode == Lexer.MODE_PATTERN) { + switch (lookAhead) { + case '+': + case '*': + case '?': { + char mod = (index < pattern.length) ? pattern[index] : '*'; + switch (mod) { + case '+': { + lookAhead = lookAhead | Lexer.QMOD_POSSESSIVE; + nextIndex(); + break; + } + case '?': { + lookAhead = lookAhead | Lexer.QMOD_RELUCTANT; + nextIndex(); + break; + } + default: { + lookAhead = lookAhead | Lexer.QMOD_GREEDY; + break; + } + } + + break; + } + + case '{': { + lookAheadST = processQuantifier(lookAhead); + break; + } + + case '$': + lookAhead = CHAR_DOLLAR; + break; + case '(': { + if (pattern[index] == '?') { + nextIndex(); + char nonCap = pattern[index]; + boolean behind = false; + do { + if (!behind) { + switch (nonCap) { + case '!': + lookAhead = CHAR_NEG_LOOKAHEAD; + nextIndex(); + break; + case '=': + lookAhead = CHAR_POS_LOOKAHEAD; + nextIndex(); + break; + case '>': + lookAhead = CHAR_ATOMIC_GROUP; + nextIndex(); + break; + case '<': { + nextIndex(); + nonCap = pattern[index]; + behind = true; + break; + } + default: { + lookAhead = readFlags(); + + /* + * We return res = res | 1 << 8 + * from readFlags() if we read + * (?idmsux-idmsux) + */ + if (lookAhead >= 256) { + + //Erase auxiliary bit + lookAhead = (lookAhead & 0xff); + flags = lookAhead; + lookAhead = lookAhead << 16; + lookAhead = CHAR_FLAGS | lookAhead; + } else { + flags = lookAhead; + lookAhead = lookAhead << 16; + lookAhead = CHAR_NONCAP_GROUP + | lookAhead; + } + break; + } + } + } else { + behind = false; + switch (nonCap) { + case '!': + lookAhead = CHAR_NEG_LOOKBEHIND; + nextIndex(); + break; + case '=': + lookAhead = CHAR_POS_LOOKBEHIND; + nextIndex(); + break; + default: + throw new TPatternSyntaxException("", this.toString(), index); + } + } + } while (behind); + } else { + lookAhead = CHAR_LEFT_PARENTHESIS; + } + break; + } + + case ')': + lookAhead = CHAR_RIGHT_PARENTHESIS; + break; + case '[': { + lookAhead = CHAR_LEFT_SQUARE_BRACKET; + setMode(Lexer.MODE_RANGE); + break; + } + case ']': { + if (mode == Lexer.MODE_RANGE) { + lookAhead = CHAR_RIGHT_SQUARE_BRACKET; + } + break; + } + case '^': + lookAhead = CHAR_CARET; + break; + case '|': + lookAhead = CHAR_VERTICAL_BAR; + break; + case '.': + lookAhead = CHAR_DOT; + break; + default: + break; + } + } else if (mode == Lexer.MODE_RANGE) { + switch (lookAhead) { + case '[': + lookAhead = CHAR_LEFT_SQUARE_BRACKET; + break; + case ']': + lookAhead = CHAR_RIGHT_SQUARE_BRACKET; + break; + case '^': + lookAhead = CHAR_CARET; + break; + case '&': + lookAhead = CHAR_AMPERSAND; + break; + case '-': + lookAhead = CHAR_HYPHEN; + break; + default: + break; + } + } + } while (reread); + } + + /** + * Parse character classes names and verifies correction of the syntax; + */ + private String parseCharClassName() { + StringBuilder sb = new StringBuilder(10); + if (index < pattern.length - 2) { + // one symbol family + if (pattern[index] != '{') { + return "Is" + new String(pattern, nextIndex(), 1); //$NON-NLS-1$ + } + + nextIndex(); + char ch = 0; + while (index < pattern.length - 2 + && (ch = pattern[nextIndex()]) != '}') { + sb.append(ch); + } + if (ch != '}') + throw new TPatternSyntaxException("", this.toString(), index); + } + + if (sb.length() == 0) + throw new TPatternSyntaxException("", this.toString(), index); + + String res = sb.toString(); + if (res.length() == 1) + return "Is" + res; + return (res.length() > 3 && (res.startsWith("Is") || res.startsWith("In"))) ? res.substring(2) : res; + } + + /** + * Process given character in assumption that it's quantifier. + */ + private Quantifier processQuantifier(int ch) { + StringBuilder sb = new StringBuilder(4); + int min = -1; + int max = Integer.MAX_VALUE; + while (index < pattern.length && (ch = pattern[nextIndex()]) != '}') { + if (ch == ',' && min < 0) { + try { + min = Integer.parseInt(sb.toString(), 10); + sb.delete(0, sb.length()); + } catch (NumberFormatException nfe) { + throw new TPatternSyntaxException("", this.toString(), index); + } + } else { + sb.append((char) ch); + } + } + if (ch != '}') { + throw new TPatternSyntaxException("", this.toString(), index); + } + if (sb.length() > 0) { + try { + max = Integer.parseInt(sb.toString(), 10); + if (min < 0) + min = max; + } catch (NumberFormatException nfe) { + throw new TPatternSyntaxException("", this.toString(), index); + } + } else if (min < 0) { + throw new TPatternSyntaxException("", this.toString(), index); + } + if ((min | max | max - min) < 0) { + throw new TPatternSyntaxException("", this.toString(), index); + } + + char mod = (index < pattern.length) ? pattern[index] : '*'; + + switch (mod) { + case '+': + lookAhead = Lexer.QUANT_COMP_P; + nextIndex(); + break; + case '?': + lookAhead = Lexer.QUANT_COMP_R; + nextIndex(); + break; + default: + lookAhead = Lexer.QUANT_COMP; + break; + } + return new Quantifier(min, max); + } + + public String toString() { + return orig; + } + + /** + * Checks if there are any characters in the pattern. + * + * @return true if there are no more characters in the pattern. + */ + public boolean isEmpty() { + return ch == 0 && lookAhead == 0 && index == patternFullLength && !isSpecial(); + } + + /** + * Returns true if current character is plain token. + */ + public static boolean isLetter(int ch) { + + //all supplementary codepoints have integer value that is >= 0; + return ch >= 0; + } + + /** + * Return true if current character is letter, false otherwise; This is + * shortcut to static method isLetter to check the current character. + * + * @return true if current character is letter, false otherwise + */ + public boolean isLetter() { + return !isEmpty() && !isSpecial() && isLetter(ch); + } + + /* + * Note that Character class methods + * isHighSurrogate(), isLowSurrogate() + * take char parameter while we need an int + * parameter without truncation to char value + */ + public boolean isHighSurrogate() { + return (ch <= 0xDBFF) && (ch >= 0xD800); + } + + public boolean isLowSurrogate() { + return (ch <= 0xDFFF) && (ch >= 0xDC00); + } + + public static boolean isHighSurrogate(int ch) { + return (ch <= 0xDBFF) && (ch >= 0xD800); + } + + public static boolean isLowSurrogate(int ch) { + return (ch <= 0xDFFF) && (ch >= 0xDC00); + } + + /** + * Process hexadecimal integer. + */ + private int readHex(String radixName, int max) { + StringBuilder st = new StringBuilder(max); + int length = pattern.length - 2; + int i; + for (i = 0; i < max && index < length; i++) { + st.append(pattern[nextIndex()]); + } + if (i == max) { + try { + return Integer.parseInt(st.toString(), 16); + } catch (NumberFormatException nfe) { + } + } + + throw new TPatternSyntaxException("", this.toString(), index); + } + + /** + * Process octal integer. + */ + private int readOctals() { + int max = 3; + int i = 1; + int first; + int res; + int length = pattern.length - 2; + + switch (first = Character.digit(pattern[index], 8)) { + case -1: + throw new TPatternSyntaxException("", this.toString(), index); + default: { + if (first > 3) + max--; + nextIndex(); + res = first; + } + } + + while (i < max && index < length + && (first = Character.digit(pattern[index], 8)) >= 0) { + res = res * 8 + first; + nextIndex(); + i++; + } + + return res; + } + + /** + * Process expression flags given with (?idmsux-idmsux) + */ + private int readFlags() { + char ch; + boolean pos = true; + int res = flags; + + while (index < pattern.length) { + ch = pattern[index]; + switch (ch) { + case '-': + if (!pos) { + throw new TPatternSyntaxException("", this.toString(), index); + } + pos = false; + break; + + case 'i': + res = pos + ? res | TPattern.CASE_INSENSITIVE + : (res ^ TPattern.CASE_INSENSITIVE) & res; + break; + + case 'd': + res = pos + ? res | TPattern.UNIX_LINES + : (res ^ TPattern.UNIX_LINES) & res; + break; + + case 'm': + res = pos + ? res | TPattern.MULTILINE + : (res ^ TPattern.MULTILINE) & res; + break; + + case 's': + res = pos + ? res | TPattern.DOTALL + : (res ^ TPattern.DOTALL) & res; + break; + + case 'u': + res = pos + ? res | TPattern.UNICODE_CASE + : (res ^ TPattern.UNICODE_CASE) & res; + break; + + case 'x': + res = pos + ? res | TPattern.COMMENTS + : (res ^ TPattern.COMMENTS) & res; + break; + + case ':': + nextIndex(); + return res; + + case ')': + nextIndex(); + return res | (1 << 8); + + default: + // ignore invalid flags (HARMONY-2127) + } + nextIndex(); + } + throw new TPatternSyntaxException("", this.toString(), index); + } + + + /** + * Returns next character index to read and moves pointer to the next one. + * If comments flag is on this method will skip comments and whitespaces. + * + * The following actions are equivalent if comments flag is off ch = + * pattern[index++] == ch = pattern[nextIndex] + * + * @return next character index to read. + */ + private int nextIndex() { + prevNW = index; + if ((flags & TPattern.COMMENTS) != 0) { + skipComments(); + } else { + index++; + } + return prevNW; + } + + /** + * Skips comments and whitespaces + */ + private int skipComments() { + int length = pattern.length - 2; + index++; + do { + while (index < length && Character.isWhitespace(pattern[index])) + index++; + if (index < length && pattern[index] == '#') { + index++; + while (index < length && !isLineSeparator(pattern[index])) + index++; + } else + return index; + } while (true); + } + + private boolean isLineSeparator(int ch) { + return (ch == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'); + } + + /** + * Gets decomposition for given codepoint from + * decomposition mappings table. + * + * @param ch - Unicode codepoint + * @return array of codepoints that is a canonical + * decomposition of ch. + */ + static int [] getDecomposition(int ch) { + return decompTable.get(ch); + } + + /** + * Gets decomposition for given Hangul syllable. + * This is an implementation of Hangul decomposition algorithm + * according to http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * "3.12 Conjoining Jamo Behavior". + * + * @param ch - given Hangul syllable + * @return canonical decomposition of ch. + */ + static int [] getHangulDecomposition(int ch) { + int SIndex = ch - SBase; + + if (SIndex < 0 || SIndex >= SCount) { + return null; + } else { + int L = LBase + SIndex / NCount; + int V = VBase + (SIndex % NCount) / TCount; + int T = SIndex % TCount; + int decomp []; + + if (T == 0) { + decomp = new int [] {L, V}; + } else { + T = TBase + T; + decomp = new int [] {L, V, T}; + } + return decomp; + } + } + + /** + * Gets canonical class for given codepoint from + * decomposition mappings table. + * + * @param - ch Unicode codepoint + * @return canonical class for given Unicode codepoint + * that is represented by ch. + */ + static int getCanonicalClass(int ch) { + int canClass = canonClassesTable.get(ch); + + return (canClass == canonClassesTableSize) + ? 0 + : canClass; + } + + /** + * Tests if given codepoint is a canonical decomposition of another + * codepoint. + * + * @param ch - codepoint to test + * @return true if ch is a decomposition. + */ + static boolean hasSingleCodepointDecomposition(int ch) { + int hasSingleDecomp = singleDecompTable.get(ch); + + /* + * singleDecompTable doesn't contain ch + * == (hasSingleDecomp == singleDecompTableSize) + */ + return (hasSingleDecomp == singleDecompTableSize) + ? false + : true; + } + + /** + * Tests if given codepoint has canonical decomposition + * and given codepoint's canonical class is not 0. + * + * @param ch - codepoint to test + * @return true if canonical class is not 0 and ch has a decomposition. + */ + static boolean hasDecompositionNonNullCanClass(int ch) { + return ch == 0x0340 | ch == 0x0341 | ch == 0x0343 | ch == 0x0344; + } + + private int nextCodePoint() { + char high = pattern[nextIndex()]; + + if (Character.isHighSurrogate(high)) { + + //low and high char may be delimited by spaces + int lowExpectedIndex = prevNW + 1; + + if (lowExpectedIndex < pattern.length) { + char low = pattern[lowExpectedIndex]; + if (Character.isLowSurrogate(low)) { + nextIndex(); + return Character.toCodePoint(high, low); + } + } + } + + return (int) high; + } + + /** + * Tests Unicode codepoint if it is a boundary + * of decomposed Unicode codepoint. + * + * @param ch - Unicode codepoint to test + * @return true if given codepoint is a boundary. + */ + static boolean isDecomposedCharBoundary(int ch) { + int canClass = canonClassesTable.get(ch); + + //Lexer.getCanonicalClass(ch) == 0 + boolean isBoundary = (canClass == canonClassesTableSize); + + return isBoundary; + } + + /** + * Returns the curr. character index. + */ + public int getIndex() { + return curToc; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java new file mode 100644 index 000000000..f6c103b9e --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/* + * This class is a range that contains only surrogate characters. + */ +class LowHighSurrogateRangeSet extends JointSet { + + protected AbstractCharClass surrChars; + + protected boolean alt = false; + + public LowHighSurrogateRangeSet(AbstractCharClass surrChars, AbstractSet next) { + this.surrChars = surrChars.getInstance(); + this.alt = surrChars.alt; + setNext(next); + } + + public LowHighSurrogateRangeSet(AbstractCharClass surrChars) { + this.surrChars = surrChars.getInstance(); + this.alt = surrChars.alt; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int startStr = matchResult.getLeftBound(); + int strLength = matchResult.getRightBound(); + + if (stringIndex + 1 > strLength) { + matchResult.hitEnd = true; + return -1; + } + + char ch = testString.charAt(stringIndex); + + if (!surrChars.contains(ch)) { + return -1; + } + + if (Character.isHighSurrogate(ch)) { + + if (stringIndex + 1 < strLength) { + char low = testString.charAt(stringIndex + 1); + + if (Character.isLowSurrogate(low)) { + return -1; + } + } + } else if (Character.isLowSurrogate(ch)) { + + if (stringIndex > startStr) { + char high = testString.charAt(stringIndex - 1); + + if (Character.isHighSurrogate(high)) { + return -1; + } + } + } + + return next.matches(stringIndex + 1, testString, matchResult); + } + + protected String getName() { + return "range:" + (alt ? "^ " : " ") + surrChars.toString(); + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return false; + } else if (set instanceof RangeSet) { + return false; + } else if (set instanceof SupplRangeSet) { + return false; + } else if (set instanceof SupplCharSet) { + return false; + } + + return true; + } + + protected AbstractCharClass getChars() { + return surrChars; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java new file mode 100644 index 000000000..008c8fa08 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class represents low surrogate character. + */ +class LowSurrogateCharSet extends JointSet{ + + /* + * Note that we can use high and low surrogate characters + * that don't combine into supplementary code point. + * See http://www.unicode.org/reports/tr18/#Supplementary_Characters + */ + private char low; + + public LowSurrogateCharSet(char low) { + this.low = low; + } + + /** + * Returns the next. + */ + public AbstractSet getNext() { + return this.next; + } + + /** + * Sets next abstract set. + * @param next + * The next to set. + */ + public void setNext(AbstractSet next) { + this.next = next; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (stringIndex + 1 > matchResult.getRightBound()) { + matchResult.hitEnd = true; + return -1; + } + + char low = testString.charAt(stringIndex); + + if (stringIndex > matchResult.getLeftBound()) { + char high = testString.charAt(stringIndex - 1); + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isHighSurrogate(high)) { + return -1; + } + } + + if (this.low == low) { + return next.matches(stringIndex + 1, testString, + matchResult); + } + + return -1; + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + String testStr = (String) testString; + int startStr = matchResult.getLeftBound(); + int strLength = matchResult.getRightBound(); + + while (strIndex < strLength) { + + strIndex = testStr.indexOf(low, strIndex); + if (strIndex < 0) + return -1; + + if (strIndex > startStr) { + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isHighSurrogate(testStr.charAt(strIndex - 1))) { + strIndex++; + continue; + } + } + + if (next.matches(strIndex + 1, testString, matchResult) >= 0) { + return strIndex; + } + strIndex++; + } + + return -1; + } + + return super.find(strIndex, testString, matchResult); + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (testString instanceof String) { + int startStr = matchResult.getLeftBound(); + String testStr = (String) testString; + + while (lastIndex >= strIndex) { + lastIndex = testStr.lastIndexOf(low, lastIndex); + if (lastIndex < 0 || lastIndex < strIndex) { + return -1; + } + + if (lastIndex > startStr) { + + /* + * we consider high surrogate followed by + * low surrogate as a codepoint + */ + if (Character.isHighSurrogate(testStr.charAt(lastIndex - 1))) { + lastIndex -= 2; + continue; + } + } + + if (next.matches(lastIndex + 1, testString, matchResult) >= 0) { + return lastIndex; + } + + lastIndex--; + } + + return -1; + } + + return super.findBack(strIndex, lastIndex, testString, matchResult); + } + + protected String getName() { + return "" + low; + } + + protected int getChar() { + return low; + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return false; + } else if (set instanceof RangeSet) { + return false; + } else if (set instanceof SupplRangeSet) { + return false; + } else if (set instanceof SupplCharSet) { + return false; + } else if (set instanceof HighSurrogateCharSet) { + return false; + } else if (set instanceof LowSurrogateCharSet) { + return ((LowSurrogateCharSet) set).low == this.low; + } + + return true; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java new file mode 100644 index 000000000..0ebc0e218 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.Arrays; + + +/** + * Match result implementation + * Note: probably it might make sense to combine this class with Matcher. + * + * @author Nikolay A. Kuznetsov + */ +class MatchResultImpl implements TMatchResult { + + private int[] groupBounds = null; + + private int[] consumers = null; + + private int[] compQuantCounters = null; + + private CharSequence string = null; + + private int groupCount = 0; + + private boolean valid = false; + + private int leftBound; + + private int rightBound; + + int startIndex; + + private boolean transparentBounds = false; + + private boolean anchoringBounds = false; + + boolean hitEnd = false; + + boolean requireEnd = false; + + int previousMatch = -1; + + private int mode; + + MatchResultImpl(CharSequence string, int leftBound, int rightBound, + int groupCount, int compQuantCount, int consumersCount) { + this.groupCount = ++groupCount; + this.groupBounds = new int[groupCount * 2]; + + this.consumers = new int[consumersCount]; + Arrays.fill(consumers, -1); + + if (compQuantCount > 0) + this.compQuantCounters = new int[compQuantCount]; + Arrays.fill(groupBounds, -1); + reset(string, leftBound, rightBound); + } + + TMatchResult cloneImpl() { + MatchResultImpl res = new MatchResultImpl(this.string, this.leftBound, + this.rightBound, this.groupCount - 1, 0, 0); + + res.valid = valid; + if (valid) { + System.arraycopy(groupBounds, 0, res.groupBounds, 0, + this.groupBounds.length); + } + return res; + } + + public void setConsumed(int counter, int value) { + this.consumers[counter] = value; + } + + public int getConsumed(int counter) { + return this.consumers[counter]; + } + + public int end() { + return end(0); + } + + public int end(int group) { + checkGroup(group); + return groupBounds[group * 2 + 1]; + } + + void setStart(int group, int offset) { + groupBounds[group * 2] = offset; + } + + void setEnd(int group, int offset) { + groupBounds[group * 2 + 1] = offset; + } + + int getStart(int group) { + return groupBounds[group * 2]; + } + + int getEnd(int group) { + return groupBounds[group * 2 + 1]; + } + + public String group() { + return group(0); + } + + public String group(int group) { + if (start(group) < 0) + return null; + return string.subSequence(start(group), end(group)).toString(); + } + + String getGroupNoCheck(int group) { + int st = getStart(group); + int end = getEnd(group); + if ((end | st | (end - st)) < 0 || end > string.length()) + return null; + + return string.subSequence(st, end).toString(); + } + + public int groupCount() { + return groupCount - 1; + } + + public int start() { + return start(0); + } + + public int start(int group) { + checkGroup(group); + return groupBounds[group * 2]; + } + + /* + * This method being called after any successful match; For now it's being + * used to check zero group for empty match; + */ + public void finalizeMatch() { + if (this.groupBounds[0] == -1) { + this.groupBounds[0] = this.startIndex; + this.groupBounds[1] = this.startIndex; + } + + previousMatch = end(); + } + + public int getEnterCounter(int setCounter) { + return compQuantCounters[setCounter]; + } + + public void setEnterCounter(int setCounter, int value) { + compQuantCounters[setCounter] = value; + } + + private void checkGroup(int group) { + if (!valid) { + throw new IllegalStateException(); + } + + if (group < 0 || group > groupCount) { + throw new IndexOutOfBoundsException(String.valueOf(group)); + } + } + + void updateGroup(int index, int srtOffset, int endOffset) { + checkGroup(index); + groupBounds[index * 2] = srtOffset; + groupBounds[index * 2 + 1] = endOffset; + } + + protected void setValid() { + this.valid = true; + } + + protected boolean isValid() { + return this.valid; + } + + protected void reset(CharSequence newSequence, int leftBound, int rightBound) { + valid = false; + mode = TMatcher.MODE_MATCH; + Arrays.fill(groupBounds, -1); + Arrays.fill(consumers, -1); + + if (newSequence != null) + this.string = newSequence; + if (leftBound >= 0) + this.setBounds(leftBound, rightBound); + this.startIndex = this.leftBound; + } + + protected void reset() { + reset(null, -1, -1); + } + + private void setBounds(int leftBound, int rightBound) { + this.leftBound = leftBound; + this.rightBound = rightBound; + } + + protected void setStartIndex(int startIndex) { + this.startIndex = startIndex; + previousMatch = previousMatch >= 0 ? previousMatch : startIndex; + } + + public int getLeftBound() { + return this.leftBound; + } + + public int getRightBound() { + return this.rightBound; + } + + protected void setMode(int mode) { + this.mode = mode; + } + + protected int mode() { + return mode; + } + + protected void useAnchoringBounds(boolean value) { + this.anchoringBounds = value; + } + + protected boolean hasAnchoringBounds() { + return this.anchoringBounds; + } + + protected void useTransparentBounds(boolean value) { + this.transparentBounds = value; + } + + protected boolean hasTransparentBounds() { + return this.transparentBounds; + } + + int getPreviousMatchEnd() { + return previousMatch; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java new file mode 100644 index 000000000..fd611af76 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents multiline version of the dollar sign. + * + * @author Nikolay A. Kuznetsov + */ +class MultiLineEOLSet extends AbstractSet { + + private int consCounter; + + public MultiLineEOLSet(int counter) { + this.consCounter = counter; + } + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strDif = matchResult.hasAnchoringBounds() ? matchResult + .getLeftBound() + - strIndex : testString.length() - strIndex; + char ch1; + char ch2; + if (strDif == 0) { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } else if (strDif >= 2) { + ch1 = testString.charAt(strIndex); + ch2 = testString.charAt(strIndex + 1); + } else { + ch1 = testString.charAt(strIndex); + ch2 = 'a'; + } + + switch (ch1) { + case '\r': { + if (ch2 == '\n') { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + + case '\n': + case '\u0085': + case '\u2028': + case '\u2029': { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + + default: + return -1; + } + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons; + boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); + matchResult.setConsumed(consCounter, -1); + return res; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java new file mode 100644 index 000000000..175805672 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Multiline version of the ^ sign. + * @author Nikolay A. Kuznetsov + */ +class MultiLineSOLSet extends AbstractSet { + + private AbstractLineTerminator lt; + + public MultiLineSOLSet(AbstractLineTerminator lt) { + this.lt = lt; + } + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (strIndex != matchResult.getRightBound() + && ((strIndex == 0 || (matchResult.hasAnchoringBounds() && strIndex == matchResult + .getLeftBound())) || lt.isAfterLineTerminator( + testString.charAt(strIndex - 1), testString + .charAt(strIndex)))) { + return next.matches(strIndex, testString, matchResult); + } + + return -1; + + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "^"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java new file mode 100644 index 000000000..63d17f927 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * Negative look ahead node. + * + * @author Nikolay A. Kuznetsov + */ +class NegativeLookAhead extends AtomicJointSet { + + public NegativeLookAhead(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int size = children.size(); + + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + if (e.matches(stringIndex, testString, matchResult) >= 0) + return -1; + } + + return next.matches(stringIndex, testString, matchResult); + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "NegLookaheadJointSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java new file mode 100644 index 000000000..6dd0998f3 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * Negative look behind node. + * + * @author Nikolay A. Kuznetsov + */ +class NegativeLookBehind extends AtomicJointSet { + + public NegativeLookBehind(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int size = children.size(); + int shift; + + // fSet will take this index to check if we at the right bound + // and return true if the current index equal to this one + matchResult.setConsumed(groupIndex, stringIndex); + + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + // find limits could be calculated though e.getCharCount() + // fSet will return true only if string index at fSet equal + // to stringIndex + shift = e.findBack(0, stringIndex, testString, matchResult); + if (shift >= 0) { + return -1; + } + } + + return next.matches(stringIndex, testString, matchResult); + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "NegBehindJointSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java new file mode 100644 index 000000000..c88aa8d9d --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Non-capturing group closing node. + * + * @author Nikolay A. Kuznetsov + */ +class NonCapFSet extends FSet { + + public NonCapFSet(int groupIndex) { + super(groupIndex); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int gr = getGroupIndex(); + matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr)); + + return next.matches(stringIndex, testString, matchResult); + } + + protected String getName() { + return "NonCapFSet"; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl mr) { + return false; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java new file mode 100644 index 000000000..29ee3393f --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * Node representing non-capturing group + * @author Nikolay A. Kuznetsov + */ +class NonCapJointSet extends JointSet { + + protected NonCapJointSet() { + } + + public NonCapJointSet(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int start = matchResult.getConsumed(groupIndex); + matchResult.setConsumed(groupIndex, stringIndex); + + int size = children.size(); + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + int shift = e.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + } + matchResult.setConsumed(groupIndex, start); + return -1; + } + + protected String getName() { + return "NonCapJointSet"; //$NON-NLS-1$ + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons = matchResult.getConsumed(groupIndex); + return cons != 0; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java new file mode 100644 index 000000000..d9746191a --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive quantifier over group, see java.util.regex.GroupQuantifierSet + * for more details. + * + * @author Nikolay A. Kuznetsov + */ +class PosAltGroupQuantifierSet extends AltGroupQuantifierSet { + + public PosAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + ((JointSet) innerSet).setNext(FSet.posFSet); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int nextIndex = innerSet.matches(stringIndex, testString, matchResult); + if (nextIndex > 0) + stringIndex = nextIndex; + + return next.matches(stringIndex, testString, matchResult); + } + + public void setNext(AbstractSet next) { + this.next = next; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java new file mode 100644 index 000000000..64dc5f7e5 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive composite (i.e. {n,m}) quantifier node over groups. + * @author Nikolay A. Kuznetsov + */ +class PosCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet { + + public PosCompositeGroupQuantifierSet(Quantifier quant, + AbstractSet innerSet, AbstractSet next, int type, int setCounter) { + super(quant, innerSet, next, type, setCounter); + innerSet.setNext(FSet.posFSet); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int nextIndex; + int counter = 0; + int max = quantifier.max(); + + while ((nextIndex = innerSet.matches(stringIndex, testString, + matchResult)) > stringIndex + && counter < max) { + counter++; + stringIndex = nextIndex; + } + + if (nextIndex < 0 && counter < quantifier.min()) { + return -1; + } else { + return next.matches(stringIndex, testString, matchResult); + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java new file mode 100644 index 000000000..1f8ae74dc --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive + quantifier node over groups. + * + * @author Nikolay A. Kuznetsov + */ +class PosPlusGroupQuantifierSet extends GroupQuantifierSet { + + public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + ((JointSet) innerSet).setNext(FSet.posFSet); + + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int nextIndex; + if ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) < 0) { + return -1; + } else if (nextIndex > stringIndex) { + stringIndex = nextIndex; + while ((nextIndex = innerSet.matches(stringIndex, testString, + matchResult)) > stringIndex) { + stringIndex = nextIndex; + } + } + + return next.matches(stringIndex, testString, matchResult); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java new file mode 100644 index 000000000..545572519 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * Positive lookahead node. + * + * @author Nikolay A. Kuznetsov + */ +class PositiveLookAhead extends AtomicJointSet { + + public PositiveLookAhead(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int size = children.size(); + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + int shift = e.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + // PosLookaheadFset always returns true, position remains the + // same + // next.match() from; + return next.matches(stringIndex, testString, matchResult); + } + } + + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "PosLookaheadJointSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java new file mode 100644 index 000000000..d3425bbfe --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + +/** + * Positive lookbehind node. + * + * @author Nikolay A. Kuznetsov + */ +class PositiveLookBehind extends AtomicJointSet { + + public PositiveLookBehind(ArrayList children, FSet fSet) { + super(children, fSet); + } + + /** + * Returns stringIndex+shift, the next position to match + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int size = children.size(); + int leftBound = matchResult.hasTransparentBounds()? + 0 : matchResult.getLeftBound(); + + int shift = next.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + //fSet will take this index to check if we at the right bound + // and return true if the current index equal to this one + matchResult.setConsumed(groupIndex, stringIndex); + for (int i = 0; i < size; i++) { + AbstractSet e = (AbstractSet) children.get(i); + // find limits could be calculated though e.getCharCount() + // fSet will return true only if string index at fSet equal + // to stringIndex + if (e.findBack(leftBound, stringIndex, testString, matchResult) >=0) { + matchResult.setConsumed(groupIndex, -1); + return shift; + } + } + } + + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "PosBehindJointSet"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java new file mode 100644 index 000000000..920189678 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive ? quantifier node. + * + * @author Nikolay A. Kuznetsov + */ +class PossessiveAltQuantifierSet extends AltQuantifierSet { + + public PossessiveAltQuantifierSet(LeafSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift = 0; + + if (stringIndex + leaf.charCount() <= matchResult.getRightBound() + && (shift = leaf.accepts(stringIndex, testString)) >= 1) { + stringIndex += shift; + } + + return next.matches(stringIndex, testString, matchResult); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java new file mode 100644 index 000000000..bf3408373 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive composite (i.e. {n, m}) quantifier node. + * + * @author Nikolay A. Kuznetsov + */ +class PossessiveCompositeQuantifierSet extends CompositeQuantifierSet { + + public PossessiveCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, + AbstractSet next, int type) { + super(quant, innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int min = quantifier.min(); + int max = quantifier.max(); + int i = 0; + + for (; i < min; i++) { + if (stringIndex + leaf.charCount() > matchResult.getRightBound()) { + matchResult.hitEnd = true; + return -1; + } + int shift = leaf.accepts(stringIndex, testString); + if (shift < 1) { + return -1; + } + stringIndex += shift; + } + + for (; i < max; i++) { + int shift; + if (stringIndex + leaf.charCount() > matchResult.getRightBound() + || (shift = leaf.accepts(stringIndex, testString)) < 1) { + break; + } + stringIndex += shift; + } + return next.matches(stringIndex, testString, matchResult); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java new file mode 100644 index 000000000..14671d03a --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive quantifier set over groups. + * + * @author Nikolay A. Kuznetsov + */ +class PossessiveGroupQuantifierSet extends GroupQuantifierSet { + + public PossessiveGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + innerSet.setNext(FSet.posFSet); + + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int nextIndex; + while ((nextIndex = innerSet.matches(stringIndex, testString, + matchResult)) > 0) { + stringIndex = nextIndex; + } + + return next.matches(stringIndex, testString, matchResult); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java new file mode 100644 index 000000000..cfdc1f2b9 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Possessive quantifier set over LeafSet's + * + * @author Nikolay A. Kuznetsov + */ +class PossessiveQuantifierSet extends LeafQuantifierSet { + + public PossessiveQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift = 0; + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() + && (shift = leaf.accepts(stringIndex, testString)) >= 1) { + stringIndex += shift; + } + + return next.matches(stringIndex, testString, matchResult); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java new file mode 100644 index 000000000..90e0a502a --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Node representing previous match (\G). + * + * @author Nikolay A. Kuznetsov + */ +class PreviousMatch extends AbstractSet { + + /** + * @see java.util.regex.AbstractSet#matches(int, java.lang.CharSequence, + * java.util.regex.MatchResultImpl) + */ + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (stringIndex == matchResult.getPreviousMatchEnd()) { + return next.matches(stringIndex, testString, matchResult); + } + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return "PreviousMatch"; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java new file mode 100644 index 000000000..f86de69dd --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents RE quantifier; contains two fields responsible for min and max + * number of repetitions. Negative value for maximum number of repetition + * represents infinity(i.e. +,*) + * + * @author Nikolay A. Kuznetsov + */ +class Quantifier extends SpecialToken implements Cloneable { + + private int min; + + private int max; + + private int counter = 0; + + public Quantifier(int min) { + this.min = this.max = min; + } + + public Quantifier(int min, int max) { + this.min = min; + this.max = max; + } + + public void resetCounter() { + counter = 0; + } + + public int getCounter() { + return counter; + } + + public void setCounter(int counter) { + this.counter = counter; + } + + public int min() { + return min; + } + + public int max() { + return max; + } + + public String toString() { + return "{" //$NON-NLS-1$ + + min + + "," //$NON-NLS-1$ + + ((max == Integer.MAX_VALUE) ? "" : new Integer(max) //$NON-NLS-1$ + .toString()) + "}"; //$NON-NLS-1$ + } + + public int getType() { + return SpecialToken.TOK_QUANTIFIER; + } + + public Object clone() { + return new Quantifier(min, max); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java new file mode 100644 index 000000000..39dcbce1e --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Base class for quantifiers. + * + * @author Nikolay A. Kuznetsov + */ +abstract class QuantifierSet extends AbstractSet { + + protected AbstractSet innerSet; + + public QuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { + super(next); + this.innerSet = innerSet; + setType(type); + } + + /** + * Returns the innerSet. + */ + public AbstractSet getInnerSet() { + return innerSet; + } + + /** + * Sets an inner set. + * @param innerSet + * The innerSet to set. + */ + public void setInnerSet(AbstractSet innerSet) { + this.innerSet = innerSet; + } + + public boolean first(AbstractSet set) { + return innerSet.first(set) || next.first(set); + } + + public boolean hasConsumed(MatchResultImpl mr) { + return true; + } + + /** + * This method is used for traversing nodes after the + * first stage of compilation. + */ + public void processSecondPass() { + this.isSecondPassVisited = true; + + if (next != null) { + + if (!next.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + JointSet set = next.processBackRefReplacement(); + + if (set != null) { + next.isSecondPassVisited = true; + next =(AbstractSet) set; + } + + /* + * End code to do during the pass + */ + next.processSecondPass(); + } + } + + if (innerSet != null) { + + if (!innerSet.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + JointSet set = innerSet.processBackRefReplacement(); + + if (set != null) { + innerSet.isSecondPassVisited = true; + innerSet =(AbstractSet) set; + } + + /* + * End code to do during the pass + */ + innerSet.processSecondPass(); + } else { + + /* + * We reach node through innerSet but it is already traversed. + * You can see this situation for GroupQuantifierSet.innerset + * if we compile smth like "(a)+ when + * GroupQuantifierSet == GroupQuantifierSet.innerset.fSet.next + */ + + /* + * Add here code to do during the pass + */ + if (innerSet instanceof SingleSet + && ((FSet) ((JointSet) innerSet).fSet) + .isBackReferenced) { + innerSet = innerSet.next; + } + + /* + * End code to do during the pass + */ + } + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java new file mode 100644 index 000000000..f17930efb --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character from the given char class. + * + * @author Nikolay A. Kuznetsov + */ + +class RangeSet extends LeafSet { + + private AbstractCharClass chars; + + private boolean alt = false; + + public RangeSet(AbstractCharClass cs, AbstractSet next) { + super(next); + this.chars = cs.getInstance(); + this.alt = cs.alt; + } + + public RangeSet(AbstractCharClass cc) { + this.chars = cc.getInstance(); + this.alt = cc.alt; + } + + public int accepts(int strIndex, CharSequence testString) { + return chars.contains(testString.charAt(strIndex)) ? 1 : -1; + } + + protected String getName() { + return "range:" + (alt ? "^ " : " ") + chars.toString(); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return AbstractCharClass.intersects(chars, ((CharSet) set) + .getChar()); + } else if (set instanceof RangeSet) { + return AbstractCharClass.intersects(chars, ((RangeSet) set) + .chars); + } else if (set instanceof SupplRangeSet) { + return AbstractCharClass.intersects(chars, ((SupplRangeSet) set) + .getChars()); + } else if (set instanceof SupplCharSet) { + return false; + } + return true; + } + + protected AbstractCharClass getChars() { + return chars; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java new file mode 100644 index 000000000..204eba0b4 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Reluctant version of "?" quantifier set over group. + * @author Nikolay A. Kuznetsov + */ +class RelAltGroupQuantifierSet extends AltGroupQuantifierSet { + + public RelAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult); + + int nextIndex = next.matches(stringIndex, testString, matchResult); + + if (nextIndex < 0) { + return innerSet.matches(stringIndex, testString, matchResult); + } else { + return nextIndex; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java new file mode 100644 index 000000000..a697b91d0 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Reluctant version of composite (i.e. {n,m}) quantifier node over + * group. + * + * @author Nikolay A. Kuznetsov + */ +class RelCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet { + + public RelCompositeGroupQuantifierSet(Quantifier quant, + AbstractSet innerSet, AbstractSet next, int type, int setCounter) { + super(quant, innerSet, next, type, setCounter); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int enterCounter = matchResult.getEnterCounter(setCounter); + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult); + + // can't go inner set; + if (enterCounter >= quantifier.max()) { + matchResult.setEnterCounter(setCounter, 0); + return next.matches(stringIndex, testString, matchResult); + } + + int nextIndex; + + if (enterCounter >= quantifier.min()) { + nextIndex = next.matches(stringIndex, testString, matchResult); + if (nextIndex < 0) { + matchResult.setEnterCounter(setCounter, ++enterCounter); + nextIndex = innerSet.matches(stringIndex, testString, + matchResult); + } else { + matchResult.setEnterCounter(setCounter, 0); + return nextIndex; + } + } else { + matchResult.setEnterCounter(setCounter, ++enterCounter); + nextIndex = innerSet.matches(stringIndex, testString, matchResult); + } + + return nextIndex; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java new file mode 100644 index 000000000..d30794de5 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * This class represents ?? quantifier over leaf sets. + * + * @author Nikolay A. Kuznetsov + */ +class ReluctantAltQuantifierSet extends AltQuantifierSet { + + public ReluctantAltQuantifierSet(LeafSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift; + + if ((shift = next.matches(stringIndex, testString, matchResult)) >= 0) { + return shift; + } else { + return innerSet.matches(stringIndex, testString, matchResult); + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java new file mode 100644 index 000000000..5e26b466f --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Reluctant version of composite(i.e. {n,m}) quantifier set over leaf nodes. + * @author Nikolay A. Kuznetsov + */ +class ReluctantCompositeQuantifierSet extends CompositeQuantifierSet { + + public ReluctantCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, + AbstractSet next, int type) { + super(quant, innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int min = quantifier.min(); + int max = quantifier.max(); + int i = 0; + int shift = 0; + + for (; i < min; i++) { + + if (stringIndex + leaf.charCount() > matchResult.getRightBound()) { + matchResult.hitEnd = true; + return -1; + } + + shift = leaf.accepts(stringIndex, testString); + if (shift < 1) { + return -1; + } + stringIndex += shift; + } + + do { + shift = next.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + + if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) { + shift = leaf.accepts(stringIndex, testString); + stringIndex += shift; + i++; + } + + } while (shift >= 1 && i <= max); + + return -1; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java new file mode 100644 index 000000000..1d94564bb --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Relactant version of the group quantifier set. + * + * @author Nikolay A. Kuznetsov + */ +class ReluctantGroupQuantifierSet extends GroupQuantifierSet { + + public ReluctantGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, + int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (!innerSet.hasConsumed(matchResult)) + return next.matches(stringIndex, testString, matchResult); + + int res = next.matches(stringIndex, testString, matchResult); + if (res < 0) { + return innerSet.matches(stringIndex, testString, matchResult); + } else { + return res; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java new file mode 100644 index 000000000..44a789159 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * This class represents [+*]? constructs over LeafSets. + * + * @see java.util.regex.LeafSet + * @author Nikolay A. Kuznetsov + */ +class ReluctantQuantifierSet extends LeafQuantifierSet { + + public ReluctantQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int shift = 0; + + do { + shift = next.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + + if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) { + shift = leaf.accepts(stringIndex, testString); + stringIndex += shift; + } + } while (shift >= 1); + + return -1; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java new file mode 100644 index 000000000..ed08dd4eb --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character. + * + * @author Nikolay A. Kuznetsov + */ +final class SOLSet extends AbstractSet { + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + if (strIndex == 0 + || (matchResult.hasAnchoringBounds() && strIndex == matchResult + .getLeftBound())) { + return next.matches(strIndex, testString, matchResult); + } + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + return false; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java new file mode 100644 index 000000000..2c17d8c08 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * This class represents nodes constructed with character sequences. For + * example, lets consider regular expression: ".*word.*". During regular + * expression compilation phase character sequence w-o-r-d, will be represented + * with single node for the entire word. + * + * During the match phase, Moyer-Moore algorithm will be used for fast + * searching. + * + * Please follow the next link for more details about mentioned algorithm: + * http://portal.acm.org/citation.cfm?id=359859 + * + * @author Nikolay A. Kuznetsov + */ +class SequenceSet extends LeafSet { + + private String string = null; + + private IntHash leftToRight; + + private IntHash rightToLeft; + + SequenceSet(StringBuffer substring) { + this.string = substring.toString(); + charCount = substring.length(); + + leftToRight = new IntHash(charCount); + rightToLeft = new IntHash(charCount); + for (int j = 0; j < charCount - 1; j++) { + leftToRight.put(string.charAt(j), charCount - j - 1); + rightToLeft + .put(string.charAt(charCount - j - 1), charCount - j - 1); + } + } + + public int accepts(int strIndex, CharSequence testString) { + return startsWith(testString, strIndex) ? charCount : -1; + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + + int strLength = matchResult.getRightBound(); + + while (strIndex <= strLength) { + strIndex = indexOf(testString, strIndex, strLength); + + if (strIndex < 0) + return -1; + if (next.matches(strIndex + charCount, testString, matchResult) >= 0) + return strIndex; + + strIndex++; + } + + return -1; + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + + while (lastIndex >= strIndex) { + lastIndex = lastIndexOf(testString, strIndex, lastIndex); + + if (lastIndex < 0) + return -1; + if (next.matches(lastIndex + charCount, testString, matchResult) >= 0) + return lastIndex; + + lastIndex--; + } + + return -1; + } + + public String getName() { + return "sequence: " + string; //$NON-NLS-1$ + } + + public boolean first(AbstractSet set) { + if (set instanceof CharSet) { + return ((CharSet) set).getChar() == string.charAt(0); + } else if (set instanceof RangeSet) { + return ((RangeSet) set).accepts(0, string.substring(0, 1)) > 0; + } else if (set instanceof SupplRangeSet) { + return ((SupplRangeSet) set).contains(string.charAt(0)) + || ((string.length() > 1) && ((SupplRangeSet) set).contains(Character + .toCodePoint(string.charAt(0), string.charAt(1)))); + } else if ((set instanceof SupplCharSet)) { + return (string.length() > 1) + ? ((SupplCharSet) set).getCodePoint() + == Character.toCodePoint(string.charAt(0), + string.charAt(1)) + : false; + } + + return true; + } + + protected int indexOf(CharSequence str, int from, int to) { + int last = string.charAt(charCount - 1); + int i = from; + + while (i <= to - charCount) { + char ch = str.charAt(i + charCount - 1); + if (ch == last && startsWith(str, i)) { + return i; + } + + i += leftToRight.get(ch); + } + return -1; + } + + protected int lastIndexOf(CharSequence str, int to, int from) { + int first = string.charAt(0); + int size = str.length(); + int delta; + int i = ((delta = size - from - charCount) > 0) ? from : from + delta; + + while (i >= to) { + char ch = str.charAt(i); + if (ch == first && startsWith(str, i)) { + return i; + } + + i -= rightToLeft.get(ch); + } + return -1; + } + + protected boolean startsWith(CharSequence str, int from) { + for (int i = 0; i < charCount; i++) { + if (str.charAt(i + from) != string.charAt(i)) + return false; + } + return true; + } + + static class IntHash { + int[] table, values; + + int mask; + + int size; // <-maximum shift + + public IntHash(int size) { + while (size >= mask) { + mask = (mask << 1) | 1; + } + mask = (mask << 1) | 1; + table = new int[mask + 1]; + values = new int[mask + 1]; + this.size = size; + } + + public void put(int key, int value) { + int i = 0; + int hashCode = key & mask; + + for (;;) { + if (table[hashCode] == 0 // empty + || table[hashCode] == key) {// rewrite + table[hashCode] = key; + values[hashCode] = value; + return; + } + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + + public int get(int key) { + + int hashCode = key & mask; + int i = 0; + int storedKey; + + for (;;) { + storedKey = table[hashCode]; + + if (storedKey == 0) { // empty + return size; + } + + if (storedKey == key) { + return values[hashCode]; + } + + i++; + i &= mask; + + hashCode += i; + hashCode &= mask; + } + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java new file mode 100644 index 000000000..e4f9719b7 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java @@ -0,0 +1,971 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * This class gives us a hashtable that contains information about + * symbols that are one symbol decompositions that is + * generated from + * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. + */ +class SingleDecompositions{ + + private static IntHash table = null; + + private SingleDecompositions() { + } + + public static IntHash getHashSingleDecompositions() { + if (table != null) { + return table; + } else { + table = new IntHash(1030); + table.put(0x6688, 0); + table.put(0x00B7, 0); + table.put(0x86A9, 0); + table.put(0x00B4, 0); + table.put(0x7409, 0); + table.put(0x559D, 0); + table.put(0x884C, 0); + table.put(0x523B, 0); + table.put(0x6DCB, 0); + table.put(0x8AFE, 0); + table.put(0x523A, 0); + table.put(0x7406, 0); + table.put(0x53F1, 0); + table.put(0x5599, 0); + table.put(0x6F6E, 0); + table.put(0x8AF8, 0); + table.put(0x3862, 0); + table.put(0x5B28, 0); + table.put(0x53EB, 0); + table.put(0x2300A, 0); + table.put(0x9929, 0); + table.put(0x9928, 0); + table.put(0x6674, 0); + table.put(0x8046, 0); + table.put(0x53E5, 0); + table.put(0x8AED, 0); + table.put(0x2054B, 0); + table.put(0x5229, 0); + table.put(0x64C4, 0); + table.put(0x278AE, 0); + table.put(0x53DF, 0); + table.put(0x5587, 0); + table.put(0x2335F, 0); + table.put(0x5584, 0); + table.put(0x92D8, 0); + table.put(0x92D7, 0); + table.put(0x5E74, 0); + table.put(0x774A, 0); + table.put(0x911B, 0); + table.put(0x81ED, 0); + table.put(0x78FB, 0); + table.put(0x5CC0, 0); + table.put(0x7740, 0); + table.put(0x81E8, 0); + table.put(0x9111, 0); + table.put(0x9D67, 0); + table.put(0x5217, 0); + table.put(0x2A20E, 0); + table.put(0x5B08, 0); + table.put(0x5E69, 0); + table.put(0x8AD6, 0); + table.put(0x267B5, 0); + table.put(0x412F, 0); + table.put(0x53CA, 0); + table.put(0x8AD2, 0); + table.put(0x881F, 0); + table.put(0x53C3, 0); + table.put(0x2940A, 0); + table.put(0x8ACB, 0); + table.put(0x256C5, 0); + table.put(0x4BCE, 0); + table.put(0x81D8, 0); + table.put(0x5207, 0); + table.put(0x267A7, 0); + table.put(0x3AE4, 0); + table.put(0x5203, 0); + table.put(0x5563, 0); + table.put(0x7295, 0); + table.put(0x20525, 0); + table.put(0x6649, 0); + table.put(0x23F8E, 0); + table.put(0x382F, 0); + table.put(0x295B6, 0); + table.put(0x67FA, 0); + table.put(0x8ABF, 0); + table.put(0x2051C, 0); + table.put(0x03CE, 0); + table.put(0x67F3, 0); + table.put(0x9EFE, 0); + table.put(0x03CD, 0); + table.put(0x03CC, 0); + table.put(0x5555, 0); + table.put(0x5553, 0); + table.put(0x771F, 0); + table.put(0x8801, 0); + table.put(0x771E, 0); + table.put(0x446B, 0); + table.put(0x7280, 0); + table.put(0x9EF9, 0); + table.put(0x8005, 0); + table.put(0x70D9, 0); + table.put(0x8363, 0); + table.put(0x8001, 0); + table.put(0x5FF9, 0); + table.put(0x6F23, 0); + table.put(0x6F22, 0); + table.put(0x8C55, 0); + table.put(0x5E3D, 0); + table.put(0x5FF5, 0); + table.put(0x8AAA, 0); + table.put(0x78CC, 0); + table.put(0x78CA, 0); + table.put(0x4D56, 0); + table.put(0x6190, 0); + table.put(0x8AA0, 0); + table.put(0x618E, 0); + table.put(0x03B9, 0); + table.put(0x56F9, 0); + table.put(0x273CA, 0); + table.put(0x70C8, 0); + table.put(0x8353, 0); + table.put(0x8C48, 0); + table.put(0x8352, 0); + table.put(0x03B0, 0); + table.put(0x25044, 0); + table.put(0x7701, 0); + table.put(0x03AF, 0); + table.put(0x03AE, 0); + table.put(0x6F0F, 0); + table.put(0x03AD, 0); + table.put(0x03AC, 0); + table.put(0x5E28, 0); + table.put(0x8F9E, 0); + table.put(0x3009, 0); + table.put(0x7262, 0); + table.put(0x3008, 0); + table.put(0x23F5E, 0); + table.put(0x8DF0, 0); + table.put(0x03A9, 0); + table.put(0x42A0, 0); + table.put(0x5099, 0); + table.put(0x262D9, 0); + table.put(0x617A, 0); + table.put(0x6613, 0); + table.put(0x84FC, 0); + table.put(0x8DEF, 0); + table.put(0x5FD7, 0); + table.put(0x26D6B, 0); + table.put(0x27CA8, 0); + table.put(0x9ECE, 0); + table.put(0x22B0C, 0); + table.put(0x3C4E, 0); + table.put(0x84F3, 0); + table.put(0x25C80, 0); + table.put(0x45F9, 0); + table.put(0x84F1, 0); + table.put(0x70AD, 0); + table.put(0x5FCD, 0); + table.put(0x7250, 0); + table.put(0x4EE4, 0); + table.put(0x8336, 0); + table.put(0x84EE, 0); + table.put(0x6168, 0); + table.put(0x9415, 0); + table.put(0x24735, 0); + table.put(0x6160, 0); + table.put(0x9EBB, 0); + table.put(0x507A, 0); + table.put(0x5510, 0); + table.put(0x8688, 0); + table.put(0x8323, 0); + table.put(0x7CE8, 0); + table.put(0x8DCB, 0); + table.put(0x7CE7, 0); + table.put(0x7CE3, 0); + table.put(0x4ECC, 0); + table.put(0x831D, 0); + table.put(0x7235, 0); + table.put(0x45D7, 0); + table.put(0x95B7, 0); + table.put(0x5FAD, 0); + table.put(0x614E, 0); + table.put(0x614C, 0); + table.put(0x7B20, 0); + table.put(0x6942, 0); + table.put(0x4EC0, 0); + table.put(0x585E, 0); + table.put(0x5FA9, 0); + table.put(0x8DBC, 0); + table.put(0x585A, 0); + table.put(0x722B, 0); + table.put(0x6148, 0); + table.put(0x8F62, 0); + table.put(0x23A8D, 0); + table.put(0x7CD6, 0); + table.put(0x9756, 0); + table.put(0x95AD, 0); + table.put(0x62FE, 0); + table.put(0x6144, 0); + table.put(0x62FC, 0); + table.put(0x7CD2, 0); + table.put(0x7228, 0); + table.put(0x73DE, 0); + table.put(0x25AA7, 0); + table.put(0x440B, 0); + table.put(0x8669, 0); + table.put(0x34DF, 0); + table.put(0x649D, 0); + table.put(0x8667, 0); + table.put(0x3C18, 0); + table.put(0x649A, 0); + table.put(0x9748, 0); + table.put(0x721B, 0); + table.put(0x98FC, 0); + table.put(0x4EAE, 0); + table.put(0x6D85, 0); + table.put(0x865C, 0); + table.put(0x5840, 0); + table.put(0x20804, 0); + table.put(0x90FD, 0); + table.put(0x7210, 0); + table.put(0x7CBE, 0); + table.put(0x7570, 0); + table.put(0x24263, 0); + table.put(0x0060, 0); + table.put(0x8650, 0); + table.put(0x29B30, 0); + table.put(0x98EF, 0); + table.put(0x90F1, 0); + table.put(0x6D78, 0); + table.put(0x9732, 0); + table.put(0x6D77, 0); + table.put(0x51F5, 0); + table.put(0x6A13, 0); + table.put(0x2A392, 0); + table.put(0x59EC, 0); + table.put(0x5832, 0); + table.put(0x7565, 0); + table.put(0x5831, 0); + table.put(0x5C8D, 0); + table.put(0x8F3B, 0); + table.put(0x647E, 0); + table.put(0x2569A, 0); + table.put(0x62D4, 0); + table.put(0x62D3, 0); + table.put(0x28BFA, 0); + table.put(0x6914, 0); + table.put(0x98E2, 0); + table.put(0x34BB, 0); + table.put(0x8F38, 0); + table.put(0x6D6A, 0); + table.put(0x73B2, 0); + table.put(0x34B9, 0); + table.put(0x62CF, 0); + table.put(0x6477, 0); + table.put(0x226D4, 0); + table.put(0x25F86, 0); + table.put(0x6D69, 0); + table.put(0x9723, 0); + table.put(0x863F, 0); + table.put(0x004B, 0); + table.put(0x7559, 0); + table.put(0x502B, 0); + table.put(0x6A02, 0); + table.put(0x6BBB, 0); + table.put(0x6BBA, 0); + table.put(0x62C9, 0); + table.put(0x8F2A, 0); + table.put(0x7E45, 0); + table.put(0x7E41, 0); + table.put(0x7FFA, 0); + table.put(0x51DE, 0); + table.put(0x59D8, 0); + table.put(0x8996, 0); + table.put(0x51DC, 0); + table.put(0x8F26, 0); + table.put(0x73A5, 0); + table.put(0x6108, 0); + table.put(0x6469, 0); + table.put(0x927C, 0); + table.put(0x9B6F, 0); + table.put(0x537F, 0); + table.put(0x6BAE, 0); + table.put(0x003B, 0); + table.put(0x862D, 0); + table.put(0x537D, 0); + table.put(0x2921A, 0); + table.put(0x898B, 0); + table.put(0x87E1, 0); + table.put(0x5C6E, 0); + table.put(0x90CE, 0); + table.put(0x9276, 0); + table.put(0x7E37, 0); + table.put(0x62B1, 0); + table.put(0x8986, 0); + table.put(0x5375, 0); + table.put(0x51CC, 0); + table.put(0x5373, 0); + table.put(0x580D, 0); + table.put(0x0390, 0); + table.put(0x753E, 0); + table.put(0x5C65, 0); + table.put(0x5C64, 0); + table.put(0x76F4, 0); + table.put(0x21D0B, 0); + table.put(0x40E3, 0); + table.put(0x753B, 0); + table.put(0x5C62, 0); + table.put(0x5C60, 0); + table.put(0x51C9, 0); + table.put(0x038F, 0); + table.put(0x038E, 0); + table.put(0x6452, 0); + table.put(0x038C, 0); + table.put(0x038A, 0); + table.put(0x6D41, 0); + table.put(0x5AB5, 0); + table.put(0x28D77, 0); + table.put(0x0389, 0); + table.put(0x7891, 0); + table.put(0x0388, 0); + table.put(0x0386, 0); + table.put(0x0385, 0); + table.put(0x265A8, 0); + table.put(0x6D3E, 0); + table.put(0x7099, 0); + table.put(0x5002, 0); + table.put(0x8612, 0); + table.put(0x76E7, 0); + table.put(0x788C, 0); + table.put(0x51B7, 0); + table.put(0x2284A, 0); + table.put(0x51B5, 0); + table.put(0x6D34, 0); + table.put(0x535A, 0); + table.put(0x3F1B, 0); + table.put(0x7524, 0); + table.put(0x76DB, 0); + table.put(0x22844, 0); + table.put(0x8606, 0); + table.put(0x9CFD, 0); + table.put(0x51AC, 0); + table.put(0x8964, 0); + table.put(0x87BA, 0); + table.put(0x5351, 0); + table.put(0x5DFD, 0); + table.put(0x6797, 0); + table.put(0x9E9F, 0); + table.put(0x51A4, 0); + table.put(0x6EDB, 0); + table.put(0x5C3F, 0); + table.put(0x707D, 0); + table.put(0x7E09, 0); + table.put(0x76CA, 0); + table.put(0x5F9A, 0); + table.put(0x7FBD, 0); + table.put(0x5349, 0); + table.put(0x9E97, 0); + table.put(0x65E3, 0); + table.put(0x7E02, 0); + table.put(0x65E2, 0); + table.put(0x6D1E, 0); + table.put(0x7077, 0); + table.put(0x3A6C, 0); + table.put(0x6D1B, 0); + table.put(0x6ED1, 0); + table.put(0x93F9, 0); + table.put(0x7070, 0); + table.put(0x786B, 0); + table.put(0x6785, 0); + table.put(0x6422, 0); + table.put(0x533F, 0); + table.put(0x6D16, 0); + table.put(0x54F6, 0); + table.put(0x6ECB, 0); + table.put(0x7506, 0); + table.put(0x9238, 0); + table.put(0x5DE2, 0); + table.put(0x5F8B, 0); + table.put(0x9234, 0); + table.put(0x5DE1, 0); + table.put(0x641C, 0); + table.put(0x677B, 0); + table.put(0x6EC7, 0); + table.put(0x243AB, 0); + table.put(0x2346D, 0); + table.put(0x8941, 0); + table.put(0x4CF8, 0); + table.put(0x5C22, 0); + table.put(0x958B, 0); + table.put(0x9E7F, 0); + table.put(0x6EBA, 0); + table.put(0x65C5, 0); + table.put(0x4CED, 0); + table.put(0x4B33, 0); + table.put(0x813E, 0); + table.put(0x232B8, 0); + table.put(0x9B12, 0); + table.put(0x784E, 0); + table.put(0x25626, 0); + table.put(0x285ED, 0); + table.put(0x704A, 0); + table.put(0x5F6B, 0); + table.put(0x2063A, 0); + table.put(0x2597C, 0); + table.put(0x4E86, 0); + table.put(0x675E, 0); + table.put(0x82E6, 0); + table.put(0x5317, 0); + table.put(0x82E5, 0); + table.put(0x4E82, 0); + table.put(0x5674, 0); + table.put(0x5F69, 0); + table.put(0x5C06, 0); + table.put(0x7AEE, 0); + table.put(0x5DBA, 0); + table.put(0x5F62, 0); + table.put(0x6756, 0); + table.put(0x7C92, 0); + table.put(0x6753, 0); + table.put(0x3D96, 0); + table.put(0x393A, 0); + table.put(0x8D77, 0); + table.put(0x4227, 0); + table.put(0x5DB2, 0); + table.put(0x79EB, 0); + table.put(0x4CCE, 0); + table.put(0x674E, 0); + table.put(0x349E, 0); + table.put(0x5668, 0); + table.put(0x5306, 0); + table.put(0x7039, 0); + table.put(0x3A2E, 0); + table.put(0x5305, 0); + table.put(0x54BD, 0); + table.put(0x43D9, 0); + table.put(0x8910, 0); + table.put(0x43D5, 0); + table.put(0x5F53, 0); + table.put(0x285D2, 0); + table.put(0x6B9F, 0); + table.put(0x738B, 0); + table.put(0x26B3C, 0); + table.put(0x7C7B, 0); + table.put(0x26247, 0); + table.put(0x0313, 0); + table.put(0x7387, 0); + table.put(0x5651, 0); + table.put(0x23D1E, 0); + table.put(0x456B, 0); + table.put(0x7ACB, 0); + table.put(0x23ED1, 0); + table.put(0x82BD, 0); + table.put(0x391C, 0); + table.put(0x8103, 0); + table.put(0x24FB8, 0); + table.put(0x701E, 0); + table.put(0x54A2, 0); + table.put(0x701B, 0); + table.put(0x737A, 0); + table.put(0x4202, 0); + table.put(0x71D0, 0); + table.put(0x79CA, 0); + table.put(0x4561, 0); + table.put(0x82B3, 0); + table.put(0x0301, 0); + table.put(0x82B1, 0); + table.put(0x0300, 0); + table.put(0x71CE, 0); + table.put(0x7375, 0); + table.put(0x455D, 0); + table.put(0x96F7, 0); + table.put(0x96F6, 0); + table.put(0x8457, 0); + table.put(0x22331, 0); + table.put(0x60E1, 0); + table.put(0x7C60, 0); + table.put(0x6721, 0); + table.put(0x57F4, 0); + table.put(0x26CD5, 0); + table.put(0x24FA1, 0); + table.put(0x6B79, 0); + table.put(0x6B77, 0); + table.put(0x7AB1, 0); + table.put(0x26228, 0); + table.put(0x671B, 0); + table.put(0x4E41, 0); + table.put(0x6B72, 0); + table.put(0x60D8, 0); + table.put(0x9094, 0); + table.put(0x5F22, 0); + table.put(0x6717, 0); + table.put(0x27667, 0); + table.put(0x8449, 0); + table.put(0x96E3, 0); + table.put(0x4E3D, 0); + table.put(0x96E2, 0); + table.put(0x5197, 0); + table.put(0x9E1E, 0); + table.put(0x5195, 0); + table.put(0x5192, 0); + table.put(0x85FA, 0); + table.put(0x79AE, 0); + table.put(0x908F, 0); + table.put(0x4E39, 0); + table.put(0x25133, 0); + table.put(0x4E38, 0); + table.put(0x4E32, 0); + table.put(0x843D, 0); + table.put(0x518D, 0); + table.put(0x60C7, 0); + table.put(0x291DF, 0); + table.put(0x6700, 0); + table.put(0x5180, 0); + table.put(0x24C92, 0); + table.put(0x7C3E, 0); + table.put(0x907C, 0); + table.put(0x4E26, 0); + table.put(0x7DF4, 0); + table.put(0x7F9A, 0); + table.put(0x6B54, 0); + table.put(0x21B18, 0); + table.put(0x57CE, 0); + table.put(0x2003, 0); + table.put(0x2002, 0); + table.put(0x4096, 0); + table.put(0x5973, 0); + table.put(0x8ED4, 0); + table.put(0x2872E, 0); + table.put(0x8D1B, 0); + table.put(0x5F04, 0); + table.put(0x452B, 0); + table.put(0x625D, 0); + table.put(0x7F95, 0); + table.put(0x9072, 0); + table.put(0x20A2C, 0); + table.put(0x96C3, 0); + table.put(0x5177, 0); + table.put(0x8786, 0); + table.put(0x25119, 0); + table.put(0x3D33, 0); + table.put(0x5A66, 0); + table.put(0x2A600, 0); + table.put(0x5A62, 0); + table.put(0x8ECA, 0); + table.put(0x68A8, 0); + table.put(0x27F2F, 0); + table.put(0x516D, 0); + table.put(0x5606, 0); + table.put(0x5962, 0); + table.put(0x96B8, 0); + table.put(0x985E, 0); + table.put(0x7F85, 0); + table.put(0x5169, 0); + table.put(0x8779, 0); + table.put(0x4E0D, 0); + table.put(0x5168, 0); + table.put(0x5167, 0); + table.put(0x85CD, 0); + table.put(0x5164, 0); + table.put(0x8D08, 0); + table.put(0x6E9C, 0); + table.put(0x20122, 0); + table.put(0x732A, 0); + table.put(0x4FBF, 0); + table.put(0x7F7A, 0); + table.put(0x25BAB, 0); + table.put(0x4FBB, 0); + table.put(0x24814, 0); + table.put(0x876B, 0); + table.put(0x5954, 0); + table.put(0x46C7, 0); + table.put(0x5951, 0); + table.put(0x7F79, 0); + table.put(0x96A3, 0); + table.put(0x7F72, 0); + table.put(0x6CE5, 0); + table.put(0x5154, 0); + table.put(0x3781, 0); + table.put(0x5BFF, 0); + table.put(0x46BE, 0); + table.put(0x6599, 0); + table.put(0x7DC7, 0); + table.put(0x99FE, 0); + table.put(0x6234, 0); + table.put(0x4FAE, 0); + table.put(0x5948, 0); + table.put(0x514D, 0); + table.put(0x5944, 0); + table.put(0x6B21, 0); + table.put(0x2219F, 0); + table.put(0x5BF3, 0); + table.put(0x74CA, 0); + table.put(0x7DBE, 0); + table.put(0x622E, 0); + table.put(0x63E4, 0); + table.put(0x983B, 0); + table.put(0x99F1, 0); + table.put(0x5145, 0); + table.put(0x52FA, 0); + table.put(0x5BEE, 0); + table.put(0x5140, 0); + table.put(0x3EB8, 0); + table.put(0x549E, 0); + table.put(0x52F5, 0); + table.put(0x6CCD, 0); + table.put(0x7669, 0); + table.put(0x6CCC, 0); + table.put(0x5BE7, 0); + table.put(0x26523, 0); + table.put(0x9038, 0); + table.put(0x27966, 0); + table.put(0x621B, 0); + table.put(0x6E6E, 0); + table.put(0x5A27, 0); + table.put(0x3EAC, 0); + table.put(0x88FA, 0); + table.put(0x9829, 0); + table.put(0x6578, 0); + table.put(0x22183, 0); + table.put(0x88F8, 0); + table.put(0x6CBF, 0); + table.put(0x7DA0, 0); + table.put(0x6210, 0); + table.put(0x6B04, 0); + table.put(0x52E4, 0); + table.put(0x5BD8, 0); + table.put(0x216EA, 0); + table.put(0x4046, 0); + table.put(0x5922, 0); + table.put(0x5D7C, 0); + table.put(0x5A1B, 0); + table.put(0x9023, 0); + table.put(0x63C5, 0); + table.put(0x63C4, 0); + table.put(0x656C, 0); + table.put(0x829D, 0); + table.put(0x52DE, 0); + table.put(0x24C36, 0); + table.put(0x591A, 0); + table.put(0x9818, 0); + table.put(0x8B8A, 0); + table.put(0x7F3E, 0); + table.put(0x26FB1, 0); + table.put(0x91D1, 0); + table.put(0x6200, 0); + table.put(0x4039, 0); + table.put(0x238A7, 0); + table.put(0x8291, 0); + table.put(0x52D2, 0); + table.put(0x88E1, 0); + table.put(0x5D6E, 0); + table.put(0x91CF, 0); + table.put(0x5BC3, 0); + table.put(0x2541D, 0); + table.put(0x49E6, 0); + table.put(0x8B80, 0); + table.put(0x7642, 0); + table.put(0x5D6B, 0); + table.put(0x91CC, 0); + table.put(0x980B, 0); + table.put(0x8728, 0); + table.put(0x99C2, 0); + table.put(0x88DE, 0); + table.put(0x828B, 0); + table.put(0x3B9D, 0); + table.put(0x6556, 0); + table.put(0x52C9, 0); + table.put(0x52C7, 0); + table.put(0x88D7, 0); + table.put(0x2A105, 0); + table.put(0x6FFE, 0); + table.put(0x5906, 0); + table.put(0x719C, 0); + table.put(0x63A9, 0); + table.put(0x654F, 0); + table.put(0x9C57, 0); + table.put(0x5468, 0); + table.put(0x88CF, 0); + table.put(0x63A0, 0); + table.put(0x798F, 0); + table.put(0x798E, 0); + table.put(0x798D, 0); + table.put(0x91B4, 0); + table.put(0x8279, 0); + table.put(0x5D50, 0); + table.put(0x7A81, 0); + table.put(0x870E, 0); + table.put(0x38E3, 0); + table.put(0x4018, 0); + table.put(0x52B3, 0); + table.put(0x6FEB, 0); + table.put(0x88C2, 0); + table.put(0x66F8, 0); + table.put(0x99A7, 0); + table.put(0x66F4, 0); + table.put(0x826F, 0); + table.put(0x6E2F, 0); + table.put(0x9DFA, 0); + table.put(0x22BF1, 0); + table.put(0x9C40, 0); + table.put(0x761F, 0); + table.put(0x797F, 0); + table.put(0x761D, 0); + table.put(0x5EFE, 0); + table.put(0x5D43, 0); + table.put(0x934A, 0); + table.put(0x4AB2, 0); + table.put(0x9F9C, 0); + table.put(0x8B58, 0); + table.put(0x52A3, 0); + table.put(0x4008, 0); + table.put(0x6094, 0); + table.put(0x7610, 0); + table.put(0x688E, 0); + table.put(0x216A8, 0); + table.put(0x5448, 0); + table.put(0x80B2, 0); + table.put(0x233D5, 0); + table.put(0x6E1A, 0); + table.put(0x5442, 0); + table.put(0x5EEC, 0); + table.put(0x02B9, 0); + table.put(0x6885, 0); + table.put(0x38C7, 0); + table.put(0x9F8E, 0); + table.put(0x80AD, 0); + table.put(0x9F8D, 0); + table.put(0x6881, 0); + table.put(0x4359, 0); + table.put(0x716E, 0); + table.put(0x7965, 0); + table.put(0x23CBC, 0); + table.put(0x5438, 0); + table.put(0x6081, 0); + table.put(0x6FC6, 0); + table.put(0x261DA, 0); + table.put(0x233C3, 0); + table.put(0x795E, 0); + table.put(0x795D, 0); + table.put(0x250F3, 0); + table.put(0x250F2, 0); + table.put(0x8B39, 0); + table.put(0x5ED9, 0); + table.put(0x578B, 0); + table.put(0x55E2, 0); + table.put(0x7956, 0); + table.put(0x7A4F, 0); + table.put(0x9686, 0); + table.put(0x5ED3, 0); + table.put(0x5ED2, 0); + table.put(0x66C6, 0); + table.put(0x7950, 0); + table.put(0x7A4A, 0); + table.put(0x5D19, 0); + table.put(0x83F1, 0); + table.put(0x77A7, 0); + table.put(0x5ECA, 0); + table.put(0x967C, 0); + table.put(0x83EF, 0); + table.put(0x3B49, 0); + table.put(0x7A40, 0); + table.put(0x4F8B, 0); + table.put(0x541D, 0); + table.put(0x7949, 0); + table.put(0x5EC9, 0); + table.put(0x7948, 0); + table.put(0x9678, 0); + table.put(0x4334, 0); + table.put(0x9675, 0); + table.put(0x4F86, 0); + table.put(0x66B4, 0); + table.put(0x83E7, 0); + table.put(0x7149, 0); + table.put(0x4F80, 0); + table.put(0x7145, 0); + table.put(0x8CD3, 0); + table.put(0x793E, 0); + table.put(0x72FC, 0); + table.put(0x793C, 0); + table.put(0x9DB4, 0); + table.put(0x540F, 0); + table.put(0x6852, 0); + table.put(0x83DC, 0); + table.put(0x55C2, 0); + table.put(0x5EB6, 0); + table.put(0x55C0, 0); + table.put(0x5EB3, 0); + table.put(0x9304, 0); + table.put(0x5EB0, 0); + table.put(0x219C8, 0); + table.put(0x5406, 0); + table.put(0x8CC8, 0); + table.put(0x7498, 0); + table.put(0x208DE, 0); + table.put(0x8CC2, 0); + table.put(0x8CC1, 0); + table.put(0x792A, 0); + table.put(0x83CC, 0); + table.put(0x83CA, 0); + table.put(0x55B3, 0); + table.put(0x5EA6, 0); + table.put(0x8B01, 0); + table.put(0x83C9, 0); + table.put(0x7A1C, 0); + table.put(0x9F43, 0); + table.put(0x7489, 0); + table.put(0x4F60, 0); + table.put(0x27ED3, 0); + table.put(0x55AB, 0); + table.put(0x7485, 0); + table.put(0x26C36, 0); + table.put(0x61F6, 0); + table.put(0x964D, 0); + table.put(0x964B, 0); + table.put(0x61F2, 0); + table.put(0x3B19, 0); + table.put(0x8204, 0); + table.put(0x83BD, 0); + table.put(0x69EA, 0); + table.put(0x2633E, 0); + table.put(0x8564, 0); + table.put(0x9F3B, 0); + table.put(0x8201, 0); + table.put(0x7BC9, 0); + table.put(0x8CAB, 0); + table.put(0x97FF, 0); + table.put(0x7BC6, 0); + table.put(0x24608, 0); + table.put(0x4301, 0); + table.put(0x20B63, 0); + table.put(0x6C88, 0); + table.put(0x7BC0, 0); + table.put(0x6ADB, 0); + table.put(0x7471, 0); + table.put(0x7D63, 0); + table.put(0x3B08, 0); + table.put(0x29145, 0); + table.put(0x83AD, 0); + table.put(0x58F7, 0); + table.put(0x6AD3, 0); + table.put(0x25249, 0); + table.put(0x58F2, 0); + table.put(0x72C0, 0); + table.put(0x61DE, 0); + table.put(0x681F, 0); + table.put(0x7D5B, 0); + table.put(0x6383, 0); + table.put(0x7469, 0); + table.put(0x58EE, 0); + table.put(0x529B, 0); + table.put(0x2A291, 0); + table.put(0x9199, 0); + table.put(0x264DA, 0); + table.put(0x962E, 0); + table.put(0x6817, 0); + table.put(0x601C, 0); + table.put(0x637B, 0); + table.put(0x29496, 0); + table.put(0x97E0, 0); + table.put(0x21DE6, 0); + table.put(0x21DE4, 0); + table.put(0x5B85, 0); + table.put(0x745C, 0); + table.put(0x97DB, 0); + table.put(0x6012, 0); + table.put(0x50E7, 0); + table.put(0x6C67, 0); + table.put(0x9F16, 0); + table.put(0x58DF, 0); + table.put(0x72AF, 0); + table.put(0x5289, 0); + table.put(0x9F0F, 0); + table.put(0x58D8, 0); + table.put(0x214E4, 0); + table.put(0x25CD0, 0); + table.put(0x50DA, 0); + table.put(0x3FFC, 0); + table.put(0x6368, 0); + table.put(0x4635, 0); + table.put(0x4995, 0); + table.put(0x2A0CE, 0); + table.put(0x9F05, 0); + table.put(0x5717, 0); + table.put(0x9A6A, 0); + table.put(0x5716, 0); + table.put(0x7447, 0); + table.put(0x86E2, 0); + table.put(0x236A3, 0); + table.put(0x23AFA, 0); + table.put(0x6AA8, 0); + table.put(0x61B2, 0); + table.put(0x5277, 0); + table.put(0x50CF, 0); + table.put(0x36FC, 0); + table.put(0x6C4E, 0); + table.put(0x808B, 0); + table.put(0x5272, 0); + table.put(0x7D2F, 0); + table.put(0x61AF, 0); + table.put(0x916A, 0); + table.put(0x69A3, 0); + table.put(0x6350, 0); + table.put(0x4A76, 0); + table.put(0x61A4, 0); + table.put(0x6DF9, 0); + table.put(0x807E, 0); + table.put(0x7D22, 0); + table.put(0x36EE, 0); + table.put(0x8516, 0); + table.put(0x58B3, 0); + table.put(0x2339C, 0); + table.put(0x75E2, 0); + table.put(0x778B, 0); + table.put(0x4A6E, 0); + table.put(0x8070, 0); + table.put(0x8FB6, 0); + table.put(0x6DEA, 0); + table.put(0x58AC, 0); + table.put(0x23393, 0); + table.put(0x7422, 0); + table.put(0x8FB0, 0); + table.put(0x633D, 0); + table.put(0x806F, 0); + table.put(0x669C, 0); + table.put(0x7D10, 0); + table.put(0x58A8, 0); + table.put(0x8863, 0); + table.put(0x8860, 0); + table.put(0x26F2C, 0); + table.put(0x00C5, 0); + table.put(0x6691, 0); + table.put(0x387C, 0); + table.put(0x8060, 0); + table.put(0x6DDA, 0); + table.put(0x5B3E, 0); + table.put(0x3515, 0); + table.put(0x270D2, 0); + table.put(0x5246, 0); + table.put(0x7D00, 0); + return table; + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java new file mode 100644 index 000000000..1f7806efa --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Group node over subexpression w/o alternations. + * @author Nikolay A. Kuznetsov + */ +class SingleSet extends JointSet { + + protected AbstractSet kid; + + public SingleSet(AbstractSet child, FSet fSet) { + this.kid = child; + this.fSet = fSet; + this.groupIndex = fSet.getGroupIndex(); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int start = matchResult.getStart(groupIndex); + matchResult.setStart(groupIndex, stringIndex); + int shift = kid.matches(stringIndex, testString, matchResult); + if (shift >= 0) { + return shift; + } + matchResult.setStart(groupIndex, start); + return -1; + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int res = kid.find(stringIndex, testString, matchResult); + if (res >= 0) + matchResult.setStart(groupIndex, res); + return res; + } + + public int findBack(int stringIndex, int lastIndex, + CharSequence testString, MatchResultImpl matchResult) { + int res = kid.findBack(stringIndex, lastIndex, testString, matchResult); + if (res >= 0) + matchResult.setStart(groupIndex, res); + return res; + } + + public boolean first(AbstractSet set) { + return kid.first(set); + } + + /** + * This method is used for replacement backreferenced + * sets. + */ + public JointSet processBackRefReplacement() { + BackReferencedSingleSet set = new BackReferencedSingleSet(this); + + /* + * We will store a reference to created BackReferencedSingleSet + * in next field. This is needed toprocess replacement + * of sets correctly since sometimes we cannot renew all references to + * detachable set in the current point of traverse. See + * QuantifierSet and AbstractSet processSecondPass() methods for + * more details. + */ + next = set; + return set; + } + + /** + * This method is used for traversing nodes after the + * first stage of compilation. + */ + public void processSecondPass() { + this.isSecondPassVisited = true; + + if (fSet != null && !fSet.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + + /* + * End code to do during the pass + */ + fSet.processSecondPass(); + } + + if (kid != null && !kid.isSecondPassVisited) { + + /* + * Add here code to do during the pass + */ + JointSet set = kid.processBackRefReplacement(); + + if (set != null) { + kid.isSecondPassVisited = true; + kid = (AbstractSet) set; + } + + /* + * End code to do during the pass + */ + + kid.processSecondPass(); + } + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SpecialToken.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SpecialToken.java new file mode 100644 index 000000000..f8732e854 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SpecialToken.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * This is base class for special tokens like character classes + * and quantifiers. + * + * @author Nikolay A. Kuznetsov + */ +abstract class SpecialToken { + + public static final int TOK_CHARCLASS = 1 << 0; + + public static final int TOK_QUANTIFIER = 1 << 1; + + /** + * Returns the type of the token, may return following values: + * TOK_CHARCLASS - token representing character class; + * TOK_QUANTIFIER - token representing quantifier; + * + * @return character type. + */ + public abstract int getType(); +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java new file mode 100644 index 000000000..a3daddf1c --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single supplementary codepoint. + */ +class SupplCharSet extends LeafSet { + + /* + * UTF-16 encoding of this supplementary codepoint + */ + private char high = 0; + + private char low = 0; + + //int value of this supplementary codepoint + private int ch; + + public SupplCharSet(int ch) { + charCount = 2; + this.ch = ch; + char [] chUTF16 = Character.toChars(ch); + high = chUTF16[0]; + + /* + * we suppose that SupplCharSet is + * build over supplementary codepoints only + */ + low = chUTF16[1]; + } + + public int accepts(int strIndex, CharSequence testString) { + char high = testString.charAt(strIndex++); + char low = testString.charAt(strIndex); + return ((this.high == high) && (this.low == low)) ? 2 : -1; + } + + public int find(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (testString instanceof String) { + String testStr = (String) testString; + int strLength = matchResult.getRightBound(); + + while (strIndex < strLength) { + strIndex = testStr.indexOf(high, strIndex); + if (strIndex < 0) + return -1; + + strIndex++; + if (strIndex < strLength) { + char ch = testStr.charAt(strIndex); + + if ((low == ch) + && (next.matches(strIndex + 1, + testString, matchResult) >= 0)) { + return --strIndex; + } + strIndex++; + } + } + return -1; + } + + return super.find(strIndex, testString, matchResult); + } + + public int findBack(int strIndex, int lastIndex, CharSequence testString, + MatchResultImpl matchResult) { + + if (testString instanceof String) { + String testStr = (String) testString; + + while (lastIndex >= strIndex) { + lastIndex = testStr.lastIndexOf(low, lastIndex); + lastIndex--; + if (lastIndex < 0 || lastIndex < strIndex) { + return -1; + } + + if ((high == testStr.charAt(lastIndex)) + && next.matches(lastIndex + 2, + testString, matchResult) >= 0) { + return lastIndex; + } + + lastIndex--; + } + return -1; + } + + return super.findBack(strIndex, lastIndex, testString, matchResult); + } + + protected String getName() { + return "" + high + low; + } + + protected int getCodePoint() { + return ch; + } + + public boolean first(AbstractSet set) { + if (set instanceof SupplCharSet) { + return ((SupplCharSet) set).getCodePoint() == ch; + } else if (set instanceof SupplRangeSet) { + return ((SupplRangeSet) set) + .contains(ch); + } else if (set instanceof CharSet) { + return false; + } else if (set instanceof RangeSet) { + return false; + } + + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java new file mode 100644 index 000000000..3a2b73f57 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character from the given char class. + * This character can be supplementary (2 chars needed to represent) or from + * basic multilingual pane (1 needed char to represent it). + */ +class SupplRangeSet extends JointSet { + + protected AbstractCharClass chars; + + protected boolean alt = false; + + public SupplRangeSet(AbstractCharClass cs, AbstractSet next) { + this.chars = cs.getInstance(); + this.alt = cs.alt; + this.next = next; + } + + public SupplRangeSet(AbstractCharClass cc) { + this.chars = cc.getInstance(); + this.alt = cc.alt; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); + int offset = -1; + + if (stringIndex < strLength) { + char high = testString.charAt(stringIndex++); + + if (contains(high) && + (offset = next.matches(stringIndex, testString, matchResult)) > 0) { + return offset; + } + + if (stringIndex < strLength) { + char low = testString.charAt(stringIndex++); + + if (Character.isSurrogatePair(high, low) + && contains(Character.toCodePoint(high, low))) { + return next.matches(stringIndex, testString, matchResult); + } + } + } + + return -1; + } + + protected String getName() { + return "range:" + (alt ? "^ " : " ") + chars.toString(); + } + + public boolean contains(int ch) { + return chars.contains(ch); + } + + public boolean first(AbstractSet set) { + if (set instanceof SupplCharSet) { + return AbstractCharClass.intersects(chars, ((SupplCharSet) set) + .getCodePoint()); + } else if (set instanceof CharSet) { + return AbstractCharClass.intersects(chars, ((CharSet) set) + .getChar()); + } else if (set instanceof SupplRangeSet) { + return AbstractCharClass.intersects(chars, ((SupplRangeSet) set) + .chars); + } else if (set instanceof RangeSet) { + return AbstractCharClass.intersects(chars, ((RangeSet) set) + .getChars()); + } + + return true; + } + + protected AbstractCharClass getChars() { + return chars; + } + + public AbstractSet getNext() { + return next; + } + + public void setNext(AbstractSet next) { + this.next = next; + } + + public boolean hasConsumed(MatchResultImpl mr) { + return true; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatchResult.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatchResult.java new file mode 100644 index 000000000..73ffa4d01 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatchResult.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Holds the results of a successful match of a {@link TPattern} against a + * given string. The result is divided into groups, with one group for each + * pair of parentheses in the regular expression and an additional group for + * the whole regular expression. The start, end, and contents of each group + * can be queried. + * + * @see TMatcher + * @see TMatcher#toMatchResult() + * + * @author Nikolay A. Kuznetsov + */ +public interface TMatchResult { + + /** + * Returns the index of the first character following the text that matched + * the whole regular expression. + * + * @return the character index. + */ + int end(); + + /** + * Returns the index of the first character following the text that matched + * a given group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the character index. + */ + int end(int group); + + /** + * Returns the text that matched the whole regular expression. + * + * @return the text. + */ + String group(); + + /** + * Returns the text that matched a given group of the regular expression. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the text that matched the group. + */ + String group(int group); + + /** + * Returns the number of groups in the result, which is always equal to + * the number of groups in the original regular expression. + * + * @return the number of groups. + */ + int groupCount(); + + /** + * Returns the index of the first character of the text that matched + * the whole regular expression. + * + * @return the character index. + */ + int start(); + + /** + * Returns the index of the first character of the text that matched a given + * group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * + * @return the character index. + */ + int start(int group); +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java new file mode 100644 index 000000000..f6ba09a95 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java @@ -0,0 +1,698 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.ArrayList; + + +/** + * Provides a means of matching regular expressions against a given input, + * finding occurrences of regular expressions in a given input, or replacing + * parts of a given input. A {@code Matcher} instance has an associated {@link + * TPattern} instance and an input text. A typical use case is to + * iteratively find all occurrences of the {@code Pattern}, until the end of + * the input is reached, as the following example illustrates: + * + *

+ * + *

+ * Pattern p = Pattern.compile("[A-Za-z]+");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * while (m.find()) {
+ *     System.out.println(m.group()); // prints "Hello" and "Android"
+ * }
+ * 
+ * + *

+ * + * The {@code Matcher} has a state that results from the previous operations. + * For example, it knows whether the most recent attempt to find the + * {@code Pattern} was successful and at which position the next attempt would + * resume the search. Depending on the application's needs, it may become + * necessary to explicitly {@link #reset()} this state from time to time. + */ +public final class TMatcher implements TMatchResult { + + static int MODE_FIND = 1 << 0; + + static int MODE_MATCH = 1 << 1; + + private TPattern pat = null; + + private AbstractSet start = null; + + private CharSequence string = null; + + private MatchResultImpl matchResult = null; + + // bounds + private int leftBound = -1; + + private int rightBound = -1; + + // replacements + private int appendPos = 0; + + private String replacement = null; + + private String processedRepl = null; + + private ArrayList replacementParts = null; + + /** + * Appends a literal part of the input plus a replacement for the current + * match to a given {@link StringBuffer}. The literal part is exactly the + * part of the input between the previous match and the current match. The + * method can be used in conjunction with {@link #find()} and + * {@link #appendTail(StringBuffer)} to walk through the input and replace + * all occurrences of the {@code Pattern} with something else. + * + * @param buffer + * the {@code StringBuffer} to append to. + * @param replacement + * the replacement text. + * @return the {@code Matcher} itself. + * @throws IllegalStateException + * if no successful match has been made. + */ + public TMatcher appendReplacement(StringBuffer buffer, String replacement) { + processedRepl = processReplacement(replacement); + buffer.append(string.subSequence(appendPos, start())); + buffer.append(processedRepl); + appendPos = end(); + return this; + } + + /** + * Parses replacement string and creates pattern + */ + private String processReplacement(String replacement) { + if (this.replacement != null + && this.replacement.equals(replacement)) { + if (replacementParts == null) { + return processedRepl; + } else { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < replacementParts.size(); i++) { + sb.append(replacementParts.get(i)); + } + + return sb.toString(); + } + } else { + this.replacement = replacement; + char[] repl = replacement.toCharArray(); + StringBuilder res = new StringBuilder(); + replacementParts = null; + + int index = 0; + int replacementPos = 0; + boolean nextBackSlashed = false; + + while (index < repl.length) { + + if (repl[index] == '\\' && !nextBackSlashed) { + nextBackSlashed = true; + index++; + } + + if (nextBackSlashed) { + res.append(repl[index]); + nextBackSlashed = false; + } else { + if (repl[index] == '$') { + if (replacementParts == null) { + replacementParts = new ArrayList(); + } + try { + final int gr = Integer.parseInt(new String( + repl, ++index, 1)); + + if (replacementPos != res.length()) { + replacementParts.add(res.subSequence( + replacementPos, res.length())); + replacementPos = res.length(); + } + + replacementParts.add(new Object() { //$NON-LOCK-1$ + private final int grN = gr; + + public String toString() { + return group(grN); + } + }); + String group = group(gr); + replacementPos += group.length(); + res.append(group); + + } catch (IndexOutOfBoundsException iob) { + throw iob; + } catch (Exception e) { + throw new IllegalArgumentException(""); //$NON-NLS-1$ + } + } else { + res.append(repl[index]); + } + } + + index++; + } + + if (replacementParts != null && replacementPos != res.length()) { + replacementParts.add(res.subSequence(replacementPos, res + .length())); + } + return res.toString(); + } + } + + /** + * Provides a new input and resets the {@code Matcher}. This results in the + * region being set to the whole input. Results of a previous find get lost. + * The next attempt to find an occurrence of the {@link TPattern} in the + * string will start at the beginning of the input. + * + * @param input + * the new input sequence. + * + * @return the {@code Matcher} itself. + */ + public TMatcher reset(CharSequence input) { + if (input == null) { + throw new NullPointerException(""); + } + this.string = input; + return reset(); + } + + /** + * Resets the {@code Matcher}. This results in the region being set to the + * whole input. Results of a previous find get lost. The next attempt to + * find an occurrence of the {@link TPattern} in the string will start at the + * beginning of the input. + * + * @return the {@code Matcher} itself. + */ + public TMatcher reset() { + this.leftBound = 0; + this.rightBound = string.length(); + matchResult.reset(string, leftBound, rightBound); + appendPos = 0; + replacement = null; + matchResult.previousMatch = -1; + return this; + } + + /** + * Resets this matcher and sets a region. Only characters inside the region + * are considered for a match. + * + * @param start + * the first character of the region. + * @param end + * the first character after the end of the region. + * @return the {@code Matcher} itself. + */ + public TMatcher region(int start, int end) { + + if (start > end || start < 0 || end < 0 + || start > string.length() || end > string.length()) { + throw new IndexOutOfBoundsException(start + ", " + end); + } + + this.leftBound = start; + this.rightBound = end; + matchResult.reset(null, start, end); + appendPos = 0; + replacement = null; + + return this; + } + + /** + * Appends the (unmatched) remainder of the input to the given + * {@link StringBuffer}. The method can be used in conjunction with + * {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to + * walk through the input and replace all matches of the {@code Pattern} + * with something else. + * + * @param buffer + * the {@code StringBuffer} to append to. + * @return the {@code StringBuffer}. + * @throws IllegalStateException + * if no successful match has been made. + */ + public StringBuffer appendTail(StringBuffer buffer) { + return buffer.append(string.subSequence(appendPos, string.length())); + } + + /** + * Replaces the first occurrence of this matcher's pattern in the input with + * a given string. + * + * @param replacement + * the replacement text. + * @return the modified input string. + */ + public String replaceFirst(String replacement) { + reset(); + if (find()) { + StringBuffer sb = new StringBuffer(); + appendReplacement(sb, replacement); + return appendTail(sb).toString(); + } + + return string.toString(); + + } + + /** + * Replaces all occurrences of this matcher's pattern in the input with a + * given string. + * + * @param replacement + * the replacement text. + * @return the modified input string. + */ + public String replaceAll(String replacement) { + StringBuffer sb = new StringBuffer(); + reset(); + while (find()) { + appendReplacement(sb, replacement); + } + + return appendTail(sb).toString(); + } + + /** + * Returns the {@link TPattern} instance used inside this matcher. + * + * @return the {@code Pattern} instance. + */ + public TPattern pattern() { + return pat; + } + + /** + * Returns the text that matched a given group of the regular expression. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the text that matched the group. + * @throws IllegalStateException + * if no successful match has been made. + */ + public String group(int group) { + return matchResult.group(group); + } + + /** + * Returns the text that matched the whole regular expression. + * + * @return the text. + * @throws IllegalStateException + * if no successful match has been made. + */ + public String group() { + return group(0); + } + + /** + * Returns the next occurrence of the {@link TPattern} in the input. The + * method starts the search from the given character in the input. + * + * @param start + * The index in the input at which the find operation is to + * begin. If this is less than the start of the region, it is + * automatically adjusted to that value. If it is beyond the end + * of the region, the method will fail. + * @return true if (and only if) a match has been found. + */ + public boolean find(int start) { + int stringLength = string.length(); + if (start < 0 || start > stringLength) { + throw new IndexOutOfBoundsException(String.valueOf(start)); + } + + start = findAt(start); + if (start >= 0 && matchResult.isValid()) { + matchResult.finalizeMatch(); + return true; + } + matchResult.startIndex = -1; + return false; + } + + private int findAt(int startIndex) { + matchResult.reset(); + matchResult.setMode(TMatcher.MODE_FIND); + matchResult.setStartIndex(startIndex); + int foundIndex = start.find(startIndex, string, matchResult); + if (foundIndex == -1) { + matchResult.hitEnd = true; + } + return foundIndex; + } + + /** + * Returns the next occurrence of the {@link TPattern} in the input. If a + * previous match was successful, the method continues the search from the + * first character following that match in the input. Otherwise it searches + * either from the region start (if one has been set), or from position 0. + * + * @return true if (and only if) a match has been found. + */ + public boolean find() { + int length = string.length(); + if (!hasTransparentBounds()) + length = rightBound; + if (matchResult.startIndex >= 0 + && matchResult.mode() == TMatcher.MODE_FIND) { + matchResult.startIndex = matchResult.end(); + if (matchResult.end() == matchResult.start()) { + matchResult.startIndex++; + } + + return matchResult.startIndex <= length ? find(matchResult.startIndex) + : false; + } else { + return find(leftBound); + } + } + + /** + * Returns the index of the first character of the text that matched a given + * group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. + */ + public int start(int group) { + return matchResult.start(group); + } + + /** + * Returns the index of the first character following the text that matched + * a given group. + * + * @param group + * the group, ranging from 0 to groupCount() - 1, with 0 + * representing the whole pattern. + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. + */ + public int end(int group) { + return matchResult.end(group); + } + + /** + * Tries to match the {@link TPattern} against the entire region (or the + * entire input, if no region has been set). + * + * @return true if (and only if) the {@code Pattern} matches the entire + * region. + */ + public boolean matches() { + return lookingAt(leftBound, TMatcher.MODE_MATCH); + } + + /** + * Returns a replacement string for the given one that has all backslashes + * and dollar signs escaped. + * + * @param s + * the input string. + * @return the input string, with all backslashes and dollar signs having + * been escaped. + */ + public static String quoteReplacement(String s) { + // first check whether we have smth to quote + if (s.indexOf('\\') < 0 && s.indexOf('$') < 0) + return s; + StringBuilder res = new StringBuilder(s.length() * 2); + char ch; + int len = s.length(); + + for (int i = 0; i < len; i++) { + + switch (ch = s.charAt(i)) { + case '$': + res.append('\\'); + res.append('$'); + break; + case '\\': + res.append('\\'); + res.append('\\'); + break; + default: + res.append(ch); + } + } + + return res.toString(); + } + + /** + * Runs match starting from set specified against input + * sequence starting at index specified; Result of the match + * will be stored into matchResult instance; + */ + private boolean runMatch(AbstractSet set, int index, + MatchResultImpl matchResult) { + + if (set.matches(index, string, matchResult) >= 0) { + matchResult.finalizeMatch(); + return true; + } + + return false; + } + + /** + * Tries to match the {@link TPattern}, starting from the beginning of the + * region (or the beginning of the input, if no region has been set). + * Doesn't require the {@code Pattern} to match against the whole region. + * + * @return true if (and only if) the {@code Pattern} matches. + */ + public boolean lookingAt() { + return lookingAt(leftBound, TMatcher.MODE_FIND); + } + + private boolean lookingAt(int startIndex, int mode) { + matchResult.reset(); + matchResult.setMode(mode); + matchResult.setStartIndex(startIndex); + return runMatch(start, startIndex, matchResult); + } + + /** + * Returns the index of the first character of the text that matched the + * whole regular expression. + * + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. + */ + public int start() { + return start(0); + } + + /** + * Returns the number of groups in the results, which is always equal to + * the number of groups in the original regular expression. + * + * @return the number of groups. + */ + public int groupCount() { + return matchResult.groupCount(); + } + + /** + * Returns the index of the first character following the text that matched + * the whole regular expression. + * + * @return the character index. + * @throws IllegalStateException + * if no successful match has been made. + */ + public int end() { + return end(0); + } + + /** + * Converts the current match into a separate {@link TMatchResult} instance + * that is independent from this matcher. The new object is unaffected when + * the state of this matcher changes. + * + * @return the new {@code MatchResult}. + * @throws IllegalStateException + * if no successful match has been made. + */ + public TMatchResult toMatchResult() { + return this.matchResult.cloneImpl(); + } + + /** + * Determines whether this matcher has anchoring bounds enabled or not. When + * anchoring bounds are enabled, the start and end of the input match the + * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled + * by default. + * + * @param value + * the new value for anchoring bounds. + * @return the {@code Matcher} itself. + */ + public TMatcher useAnchoringBounds(boolean value) { + matchResult.useAnchoringBounds(value); + return this; + } + + /** + * Indicates whether this matcher has anchoring bounds enabled. When + * anchoring bounds are enabled, the start and end of the input match the + * '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled + * by default. + * + * @return true if (and only if) the {@code Matcher} uses anchoring bounds. + */ + public boolean hasAnchoringBounds() { + return matchResult.hasAnchoringBounds(); + } + + /** + * Determines whether this matcher has transparent bounds enabled or not. + * When transparent bounds are enabled, the parts of the input outside the + * region are subject to lookahead and lookbehind, otherwise they are not. + * Transparent bounds are disabled by default. + * + * @param value + * the new value for transparent bounds. + * @return the {@code Matcher} itself. + */ + public TMatcher useTransparentBounds(boolean value) { + matchResult.useTransparentBounds(value); + return this; + } + + /** + * Indicates whether this matcher has transparent bounds enabled. When + * transparent bounds are enabled, the parts of the input outside the region + * are subject to lookahead and lookbehind, otherwise they are not. + * Transparent bounds are disabled by default. + * + * @return true if (and only if) the {@code Matcher} uses anchoring bounds. + */ + public boolean hasTransparentBounds() { + return matchResult.hasTransparentBounds(); + } + + /** + * Returns this matcher's region start, that is, the first character that is + * considered for a match. + * + * @return the start of the region. + */ + public int regionStart() { + return matchResult.getLeftBound(); + } + + /** + * Returns this matcher's region end, that is, the first character that is + * not considered for a match. + * + * @return the end of the region. + */ + public int regionEnd() { + return matchResult.getRightBound(); + } + + /** + * Indicates whether more input might change a successful match into an + * unsuccessful one. + * + * @return true if (and only if) more input might change a successful match + * into an unsuccessful one. + */ + public boolean requireEnd() { + return matchResult.requireEnd; + } + + /** + * Indicates whether the last match hit the end of the input. + * + * @return true if (and only if) the last match hit the end of the input. + */ + public boolean hitEnd() { + return matchResult.hitEnd; + } + + /** + * Sets a new pattern for the {@code Matcher}. Results of a previous find + * get lost. The next attempt to find an occurrence of the {@link TPattern} + * in the string will start at the beginning of the input. + * + * @param pattern + * the new {@code Pattern}. + * + * @return the {@code Matcher} itself. + */ + public TMatcher usePattern(TPattern pattern) { + if (pattern == null) { + throw new IllegalArgumentException(""); + } + int startIndex = matchResult.getPreviousMatchEnd(); + int mode = matchResult.mode(); + this.pat = pattern; + this.start = pattern.start; + matchResult = new MatchResultImpl(this.string, leftBound, rightBound, + pattern.groupCount(), pattern.compCount(), pattern.consCount()); + matchResult.setStartIndex(startIndex); + matchResult.setMode(mode); + return this; + } + + TMatcher(TPattern pat, CharSequence cs) { + this.pat = pat; + this.start = pat.start; + this.string = cs; + this.leftBound = 0; + this.rightBound = string.length(); + matchResult = new MatchResultImpl(cs, leftBound, rightBound, pat + .groupCount(), pat.compCount(), pat.consCount()); + } + + @Override + public String toString() { + String lastMatch = ""; + try { + lastMatch = Integer.toString(start()); + } catch (IllegalStateException e) { + } + return getClass().getCanonicalName() + "[pattern=" + pat + " region=" + + matchResult.getLeftBound() + "," + + matchResult.getRightBound() + " lastmatch=" + lastMatch + "]"; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java new file mode 100644 index 000000000..2bb0bf0ae --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java @@ -0,0 +1,1429 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.io.Serializable; + +import java.util.ArrayList; + + +/** + * Represents a pattern used for matching, searching, or replacing strings. + * {@code Pattern}s are specified in terms of regular expressions and compiled + * using an instance of this class. They are then used in conjunction with a + * {@link TMatcher} to perform the actual search. + *

+ * A typical use case looks like this: + *

+ *

+ * Pattern p = Pattern.compile("Hello, A[a-z]*!");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
+ * boolean b1 = m.matches(); // true
+ *
+ * m.setInput("Hello, Robot!");
+ * boolean b2 = m.matches(); // false
+ * 
+ *

+ * The above code could also be written in a more compact fashion, though this + * variant is less efficient, since {@code Pattern} and {@code Matcher} objects + * are created on the fly instead of being reused. + * fashion: + *

+ *     boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
+ *     boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!");   // false
+ * 
+ * + * @see TMatcher + */ +public final class TPattern implements Serializable { + + private static final long serialVersionUID = 5073258162644648461L; + + static final boolean _DEBUG_ = false; + + /** + * This constant specifies that a pattern matches Unix line endings ('\n') + * only against the '.', '^', and '$' meta characters. + */ + public static final int UNIX_LINES = 1 << 0; + + /** + * This constant specifies that a {@code Pattern} is matched + * case-insensitively. That is, the patterns "a+" and "A+" would both match + * the string "aAaAaA". + */ + public static final int CASE_INSENSITIVE = 1 << 1; + + /** + * This constant specifies that a {@code Pattern} may contain whitespace or + * comments. Otherwise comments and whitespace are taken as literal + * characters. + */ + public static final int COMMENTS = 1 << 2; + + /** + * This constant specifies that the meta characters '^' and '$' match only + * the beginning and end end of an input line, respectively. Normally, they + * match the beginning and the end of the complete input. + */ + public static final int MULTILINE = 1 << 3; + + /** + * This constant specifies that the whole {@code Pattern} is to be taken + * literally, that is, all meta characters lose their meanings. + */ + public static final int LITERAL = 1 << 4; + + /** + * This constant specifies that the '.' meta character matches arbitrary + * characters, including line endings, which is normally not the case. + */ + public static final int DOTALL = 1 << 5; + + /** + * This constant specifies that a {@code Pattern} is matched + * case-insensitively with regard to all Unicode characters. It is used in + * conjunction with the {@link #CASE_INSENSITIVE} constant to extend its + * meaning to all Unicode characters. + */ + public static final int UNICODE_CASE = 1 << 6; + + /** + * This constant specifies that a character in a {@code Pattern} and a + * character in the input string only match if they are canonically + * equivalent. + */ + public static final int CANON_EQ = 1 << 7; + + static final int BACK_REF_NUMBER = 10; + + /** + * Bit mask that includes all defined match flags + */ + static final int flagsBitMask = TPattern.UNIX_LINES | + TPattern.CASE_INSENSITIVE | + TPattern.COMMENTS | + TPattern.MULTILINE | + TPattern.LITERAL | + TPattern.DOTALL | + TPattern.UNICODE_CASE | + TPattern.CANON_EQ; + + /** + * Current pattern to be compiled; + */ + private transient Lexer lexemes = null; + + /** + * Pattern compile flags; + */ + private int flags = 0; + + private String pattern = null; + + /* + * All backreferences that may be used in pattern. + */ + transient private FSet backRefs [] = new FSet [BACK_REF_NUMBER]; + + /* + * Is true if backreferenced sets replacement is needed + */ + transient private boolean needsBackRefReplacement = false; + + transient private int globalGroupIndex = -1; + + transient private int compCount = -1; + + transient private int consCount = -1; + + transient AbstractSet start = null; + + /** + * Returns a {@link TMatcher} for the {@code Pattern} and a given input. The + * {@code Matcher} can be used to match the {@code Pattern} against the + * whole input, find occurrences of the {@code Pattern} in the input, or + * replace parts of the input. + * + * @param input + * the input to process. + * + * @return the resulting {@code Matcher}. + */ + public TMatcher matcher(CharSequence input) { + return new TMatcher(this, input); + } + + /** + * Splits the given input sequence around occurrences of the {@code Pattern}. + * The function first determines all occurrences of the {@code Pattern} + * inside the input sequence. It then builds an array of the + * "remaining" strings before, in-between, and after these + * occurrences. An additional parameter determines the maximal number of + * entries in the resulting array and the handling of trailing empty + * strings. + * + * @param inputSeq + * the input sequence. + * @param limit + * Determines the maximal number of entries in the resulting + * array. + *
    + *
  • For n > 0, it is guaranteed that the resulting array + * contains at most n entries. + *
  • For n < 0, the length of the resulting array is + * exactly the number of occurrences of the {@code Pattern} +1. + * All entries are included. + *
  • For n == 0, the length of the resulting array is at most + * the number of occurrences of the {@code Pattern} +1. Empty + * strings at the end of the array are not included. + *
+ * + * @return the resulting array. + */ + public String[] split(CharSequence inputSeq, int limit) { + ArrayList res = new ArrayList(); + TMatcher mat = matcher(inputSeq); + int index = 0; + int curPos = 0; + + if (inputSeq.length() == 0) { + return new String [] {""}; //$NON-NLS-1$ + } else { + while (mat.find() && (index + 1 < limit || limit <= 0)) { + res.add(inputSeq.subSequence(curPos, mat.start()).toString()); + curPos = mat.end(); + index++; + } + + res.add(inputSeq.subSequence(curPos, inputSeq.length()).toString()); + index++; + + /* + * discard trailing empty strings + */ + if (limit == 0) { + while (--index >= 0 && res.get(index).toString().length() == 0) { + res.remove(index); + } + } + } + return (String[]) res.toArray(new String[index >= 0 ? index : 0]); + } + + /** + * Splits a given input around occurrences of a regular expression. This is + * a convenience method that is equivalent to calling the method + * {@link #split(java.lang.CharSequence, int)} with a limit of 0. + * + * @param input + * the input sequence. + * + * @return the resulting array. + */ + public String[] split(CharSequence input) { + return split(input, 0); + } + + /** + * Returns the regular expression that was compiled into this + * {@code Pattern}. + * + * @return the regular expression. + */ + public String pattern() { + return lexemes.toString(); + } + + @Override + public String toString() { + return this.pattern(); + } + + /** + * Returns the flags that have been set for this {@code Pattern}. + * + * @return the flags that have been set. A combination of the constants + * defined in this class. + * + * @see #CANON_EQ + * @see #CASE_INSENSITIVE + * @see #COMMENTS + * @see #DOTALL + * @see #LITERAL + * @see #MULTILINE + * @see #UNICODE_CASE + * @see #UNIX_LINES + */ + public int flags() { + return this.flags; + } + + /** + * Compiles a regular expression, creating a new {@code Pattern} instance in + * the process. Allows to set some flags that modify the behavior of the + * {@code Pattern}. + * + * @param pattern + * the regular expression. + * @param flags + * the flags to set. Basically, any combination of the constants + * defined in this class is valid. + * + * @return the new {@code Pattern} instance. + * + * @throws TPatternSyntaxException + * if the regular expression is syntactically incorrect. + * + * @see #CANON_EQ + * @see #CASE_INSENSITIVE + * @see #COMMENTS + * @see #DOTALL + * @see #LITERAL + * @see #MULTILINE + * @see #UNICODE_CASE + * @see #UNIX_LINES + */ + public static TPattern compile(String pattern, int flags) + throws TPatternSyntaxException { + + if ((flags != 0) && + ((flags | flagsBitMask) != flagsBitMask)) { + + throw new IllegalArgumentException(""); + } + + AbstractSet.counter = 1; + + return new TPattern().compileImpl(pattern, flags); + } + + /** + * + * @param pattern - + * Regular expression to be compiled + * @param flags - + * The bit mask including CASE_INSENSITIVE, MULTILINE, DOTALL, + * UNICODE_CASE, and CANON_EQ + * + * @return Compiled pattern + */ + private TPattern compileImpl(String pattern, int flags) + throws TPatternSyntaxException { + this.lexemes = new Lexer(pattern, flags); + this.flags = flags; + this.pattern = pattern; + + start = processExpression(-1, this.flags, null); + if (!lexemes.isEmpty()) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } + finalizeCompile(); + return this; + } + + /** + * A->(a|)+ + */ + private AbstractSet processAlternations(AbstractSet last) { + CharClass auxRange = new CharClass(hasFlag(TPattern.CASE_INSENSITIVE), + hasFlag(TPattern.UNICODE_CASE)); + while (!lexemes.isEmpty() + && lexemes.isLetter() + && (lexemes.lookAhead() == 0 + || lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes + .lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS)) { + auxRange.add(lexemes.next()); + if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR) + lexemes.next(); + } + AbstractSet rangeSet = processRangeSet(auxRange); + rangeSet.setNext(last); + + return rangeSet; + } + + /** + * E->AE; E->S|E; E->S; A->(a|)+ E->S(|S)* + */ + private AbstractSet processExpression(int ch, int newFlags, + AbstractSet last) { + ArrayList children = new ArrayList(); + AbstractSet child; + int saveFlags = flags; + FSet fSet; + boolean saveChangedFlags = false; + + if (newFlags != flags) { + flags = newFlags; + } + + switch (ch) { + case Lexer.CHAR_NONCAP_GROUP: + fSet = new NonCapFSet(++consCount); + break; + + case Lexer.CHAR_POS_LOOKAHEAD: + /* falls through */ + + case Lexer.CHAR_NEG_LOOKAHEAD: + fSet = new AheadFSet(); + break; + + case Lexer.CHAR_POS_LOOKBEHIND: + /* falls through */ + + case Lexer.CHAR_NEG_LOOKBEHIND: + fSet = new BehindFSet(++consCount); + break; + + case Lexer.CHAR_ATOMIC_GROUP: + fSet = new AtomicFSet(++consCount); + break; + + default: + globalGroupIndex++; + if (last == null) { + + // expr = new StartSet(); + fSet = new FinalSet(); + saveChangedFlags = true; + } else { + + // expr = new JointSet(globalGroupIndex); + fSet = new FSet(globalGroupIndex); + } + if (globalGroupIndex > -1 && globalGroupIndex < 10) { + backRefs[globalGroupIndex] = fSet; + } + break; + } + + do { + if (lexemes.isLetter() + && lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR) { + child = processAlternations(fSet); + } else if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR){ + child = new EmptySet(fSet); + lexemes.next(); + } else { + child = processSubExpression(fSet); + if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR) { + lexemes.next(); + } + } + if (child != null) { + + //expr.addChild(child); + children.add(child); + } + } while (!(lexemes.isEmpty() + || (lexemes.peek() == Lexer.CHAR_RIGHT_PARENTHESIS))); + + if (lexemes.back() == Lexer.CHAR_VERTICAL_BAR) { + children.add(new EmptySet(fSet)); + } + + if (flags != saveFlags && !saveChangedFlags) { + flags = saveFlags; + lexemes.restoreFlags(flags); + } + + switch (ch) { + case Lexer.CHAR_NONCAP_GROUP: + return new NonCapJointSet(children, fSet); + + case Lexer.CHAR_POS_LOOKAHEAD: + return new PositiveLookAhead(children, fSet); + + case Lexer.CHAR_NEG_LOOKAHEAD: + return new NegativeLookAhead(children, fSet); + + case Lexer.CHAR_POS_LOOKBEHIND: + return new PositiveLookBehind(children, fSet); + + case Lexer.CHAR_NEG_LOOKBEHIND: + return new NegativeLookBehind(children, fSet); + + case Lexer.CHAR_ATOMIC_GROUP: + return new AtomicJointSet(children, fSet); + + default: + switch (children.size()) { + case 0: + return new EmptySet(fSet); + + case 1: + return new SingleSet((AbstractSet) children.get(0), fSet); + + default: + return new JointSet(children, fSet); + } + } + } + + + /** + * T->a+ + */ + private AbstractSet processSequence(AbstractSet last) { + StringBuffer substring = new StringBuffer(); + + while (!lexemes.isEmpty() + && lexemes.isLetter() + && !lexemes.isHighSurrogate() + && !lexemes.isLowSurrogate() + && ((!lexemes.isNextSpecial() && lexemes.lookAhead() == 0) // end + // of + // pattern + || (!lexemes.isNextSpecial() && Lexer.isLetter(lexemes + .lookAhead())) + || lexemes.lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS + || (lexemes.lookAhead() & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS + || lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes + .lookAhead() == Lexer.CHAR_DOLLAR)) { + int ch = lexemes.next(); + + if (Character.isSupplementaryCodePoint(ch)) { + substring.append(Character.toChars(ch)); + } else { + substring.append((char) ch); + } + } + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + return new SequenceSet(substring); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + return new CISequenceSet(substring); + } else { + return new UCISequenceSet(substring); + } + } + + /** + * D->a + */ + private AbstractSet processDecomposedChar(AbstractSet last) { + int [] codePoints = new int [Lexer.MAX_DECOMPOSITION_LENGTH]; + char [] codePointsHangul; + int readCodePoints = 0; + int curSymb = -1; + int curSymbIndex = -1; + + if (!lexemes.isEmpty() && lexemes.isLetter()) { + curSymb = lexemes.next(); + codePoints [readCodePoints] = curSymb; + curSymbIndex = curSymb - Lexer.LBase; + } + + /* + * We process decomposed Hangul syllable LV or LVT or process jamo L. + * See http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * "3.12 Conjoining Jamo Behavior" + */ + if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.LCount)) { + codePointsHangul = new char [Lexer + .MAX_HANGUL_DECOMPOSITION_LENGTH]; + codePointsHangul[readCodePoints++] = (char) curSymb; + + curSymb = lexemes.peek(); + curSymbIndex = curSymb - Lexer.VBase; + if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.VCount)) { + codePointsHangul [readCodePoints++] = (char) curSymb; + lexemes.next(); + curSymb = lexemes.peek(); + curSymbIndex = curSymb - Lexer.TBase; + if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.TCount)) { + codePointsHangul [readCodePoints++] = (char) curSymb; + lexemes.next(); + + //LVT syllable + return new HangulDecomposedCharSet(codePointsHangul, 3); + } else { + + //LV syllable + return new HangulDecomposedCharSet(codePointsHangul, 2); + } + } else { + + //L jamo + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + return new CharSet(codePointsHangul[0]); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + return new CICharSet(codePointsHangul[0]); + } else { + return new UCICharSet(codePointsHangul[0]); + } + } + + /* + * We process single codepoint or decomposed codepoint. + * We collect decomposed codepoint and obtain + * one DecomposedCharSet. + */ + } else { + readCodePoints++; + + while((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) + && !lexemes.isEmpty() && lexemes.isLetter() + && !Lexer.isDecomposedCharBoundary(lexemes.peek())) { + codePoints [readCodePoints++] = lexemes.next(); + } + + /* + * We have read an ordinary symbol. + */ + if (readCodePoints == 1 + && !Lexer.hasSingleCodepointDecomposition(codePoints[0])) { + return processCharSet(codePoints[0]); + } else { + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + return new DecomposedCharSet(codePoints, readCodePoints); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + return new CIDecomposedCharSet(codePoints, readCodePoints); + } else { + return new UCIDecomposedCharSet(codePoints, readCodePoints); + } + } + } + } + + /** + * S->BS; S->QS; S->Q; B->a+ + */ + private AbstractSet processSubExpression(AbstractSet last) { + AbstractSet cur; + if (lexemes.isLetter() && !lexemes.isNextSpecial() + && Lexer.isLetter(lexemes.lookAhead())) { + if (hasFlag(TPattern.CANON_EQ)) { + cur = processDecomposedChar(last); + if (!lexemes.isEmpty() + + /* && !pattern.isQuantifier() */ + && (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS + || last instanceof FinalSet) + && lexemes.peek() != Lexer.CHAR_VERTICAL_BAR + && !lexemes.isLetter()) { + cur = processQuantifier(last, cur); + } + } else if (lexemes.isHighSurrogate() || lexemes.isLowSurrogate()) { + AbstractSet term = processTerminal(last); + cur = processQuantifier(last, term); + } else { + cur = processSequence(last); + } + } else if (lexemes.peek() == Lexer.CHAR_RIGHT_PARENTHESIS) { + if (last instanceof FinalSet) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } else { + cur = new EmptySet(last); + } + } else { + AbstractSet term = processTerminal(last); + cur = processQuantifier(last, term); + } + + if (!lexemes.isEmpty() + // && !pattern.isQuantifier() + && (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS + || last instanceof FinalSet) + && lexemes.peek() != Lexer.CHAR_VERTICAL_BAR) { + AbstractSet next = processSubExpression(last); + if (cur instanceof LeafQuantifierSet + // TODO create personal UnifiedQuantifierSet for composite + // quantifiers + // to take into account Quantifier counters + // //// + && !(cur instanceof CompositeQuantifierSet) + && !(cur instanceof GroupQuantifierSet) + && !(cur instanceof AltQuantifierSet) + && !next.first(((LeafQuantifierSet) cur).getInnerSet())) { + cur = new UnifiedQuantifierSet((LeafQuantifierSet) cur); + } + if (((char) next.getType()) == '+') { + cur.setNext(((LeafQuantifierSet) next).getInnerSet()); + } else { + cur.setNext(next); + } + } else if (cur != null) { + cur.setNext(last); + } else { + return null; + } + + if (((char) cur.getType()) == '+') { + return ((QuantifierSet) cur).getInnerSet(); + } else { + return cur; + } + } + + /** + * Q->T(*|+|?...) also do some optimizations. + * + */ + private AbstractSet processQuantifier(AbstractSet last, AbstractSet term) { + int quant = lexemes.peek(); + + if (term != null && !(term instanceof LeafSet)) { + switch (quant) { + case Lexer.QUANT_STAR: + case Lexer.QUANT_PLUS: { + QuantifierSet q; + + lexemes.next(); + if (term.getType() == AbstractSet.TYPE_DOTSET) { + if (!hasFlag(TPattern.DOTALL)) { + q = new DotQuantifierSet(term, last, quant, + AbstractLineTerminator.getInstance(flags)); + } else { + q = new DotAllQuantifierSet(term, last, quant); + } + } else { + q = new GroupQuantifierSet(term, last, quant); + } + term.setNext(q); + return q; + } + + case Lexer.QUANT_STAR_R: + case Lexer.QUANT_PLUS_R: { + lexemes.next(); + GroupQuantifierSet q = new ReluctantGroupQuantifierSet(term, + last, quant); + term.setNext(q); + return q; + } + + case Lexer.QUANT_PLUS_P: { + lexemes.next(); + // possessive plus will be handled by unique class + // and should not be postprocessed to point previous set + // to the inner one. + // // + return new PosPlusGroupQuantifierSet(term, last, + Lexer.QUANT_STAR_P); + } + + case Lexer.QUANT_STAR_P: { + lexemes.next(); + return new PossessiveGroupQuantifierSet(term, last, quant); + } + + case Lexer.QUANT_ALT: { + lexemes.next(); + AltGroupQuantifierSet q = new AltGroupQuantifierSet(term, last, + Lexer.QUANT_ALT); + term.setNext(last); + return q; + } + + case Lexer.QUANT_ALT_P: { + lexemes.next(); + return new PosAltGroupQuantifierSet(term, last, Lexer.QUANT_ALT); + } + + case Lexer.QUANT_ALT_R: { + lexemes.next(); + RelAltGroupQuantifierSet q = new RelAltGroupQuantifierSet(term, + last, Lexer.QUANT_ALT); + term.setNext(last); + return q; + } + + case Lexer.QUANT_COMP: { + CompositeGroupQuantifierSet q = new CompositeGroupQuantifierSet( + (Quantifier) lexemes.nextSpecial(), term, last, + Lexer.QUANT_ALT, ++compCount); + term.setNext(q); + return q; + } + + case Lexer.QUANT_COMP_P: { + return new PosCompositeGroupQuantifierSet((Quantifier) lexemes + .nextSpecial(), term, last, Lexer.QUANT_ALT, + ++compCount); + } + + case Lexer.QUANT_COMP_R: { + RelCompositeGroupQuantifierSet q = new RelCompositeGroupQuantifierSet( + (Quantifier) lexemes.nextSpecial(), term, last, + Lexer.QUANT_ALT, ++compCount); + term.setNext(q); + return q; + } + + default: + return term; + } + } else { + LeafSet leaf = null; + if (term != null) + leaf = (LeafSet) term; + switch (quant) { + case Lexer.QUANT_STAR: + case Lexer.QUANT_PLUS: { + lexemes.next(); + LeafQuantifierSet q = new LeafQuantifierSet(leaf, + last, quant); + leaf.setNext(q); + return q; + } + + case Lexer.QUANT_STAR_R: + case Lexer.QUANT_PLUS_R: { + lexemes.next(); + ReluctantQuantifierSet q = new ReluctantQuantifierSet(leaf, + last, quant); + leaf.setNext(q); + return q; + } + + case Lexer.QUANT_PLUS_P: + case Lexer.QUANT_STAR_P: { + lexemes.next(); + PossessiveQuantifierSet q = new PossessiveQuantifierSet(leaf, + last, quant); + leaf.setNext(q); + return q; + } + + case Lexer.QUANT_ALT: { + lexemes.next(); + return new AltQuantifierSet(leaf, last, Lexer.QUANT_ALT); + } + + case Lexer.QUANT_ALT_P: { + lexemes.next(); + return new PossessiveAltQuantifierSet(leaf, last, + Lexer.QUANT_ALT_P); + } + + case Lexer.QUANT_ALT_R: { + lexemes.next(); + return new ReluctantAltQuantifierSet(leaf, last, + Lexer.QUANT_ALT_R); + } + + case Lexer.QUANT_COMP: { + return new CompositeQuantifierSet((Quantifier) lexemes + .nextSpecial(), leaf, last, Lexer.QUANT_COMP); + } + + case Lexer.QUANT_COMP_P: { + return new PossessiveCompositeQuantifierSet( + (Quantifier) lexemes.nextSpecial(), leaf, last, + Lexer.QUANT_COMP_P); + } + case Lexer.QUANT_COMP_R: { + return new ReluctantCompositeQuantifierSet((Quantifier) lexemes + .nextSpecial(), leaf, last, Lexer.QUANT_COMP_R); + } + + default: + return term; + } + } + } + + /** + * T-> letter|[range]|{char-class}|(E) + */ + private AbstractSet processTerminal(AbstractSet last) { + int ch; + AbstractSet term = null; + do { + ch = lexemes.peek(); + if ((ch & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS) { + int newFlags; + lexemes.next(); + newFlags = (ch & 0x00ff0000) >> 16; + ch = ch & 0xff00ffff; + if (ch == Lexer.CHAR_FLAGS) { + flags = newFlags; + } else { + newFlags = (ch == Lexer.CHAR_NONCAP_GROUP) + ? newFlags + : flags; + term = processExpression(ch, newFlags, last); + if (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } + lexemes.next(); + } + } else + switch (ch) { + case Lexer.CHAR_LEFT_SQUARE_BRACKET: { + lexemes.next(); + boolean negative = false; + if (lexemes.peek() == Lexer.CHAR_CARET) { + negative = true; + lexemes.next(); + } + + term = processRange(negative, last); + if (lexemes.peek() != Lexer.CHAR_RIGHT_SQUARE_BRACKET) + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + lexemes.setMode(Lexer.MODE_PATTERN); + lexemes.next(); + break; + } + + case Lexer.CHAR_DOT: { + lexemes.next(); + + if (!hasFlag(TPattern.DOTALL)) { + term = new DotSet(AbstractLineTerminator + .getInstance(flags)); + } else { + term = new DotAllSet(); + } + + break; + } + + case Lexer.CHAR_CARET: { + lexemes.next(); + consCount++; + if (!hasFlag(TPattern.MULTILINE)) { + term = new SOLSet(); + } else { + term = new MultiLineSOLSet(AbstractLineTerminator + .getInstance(flags)); + } + + break; + } + + case Lexer.CHAR_DOLLAR: { + lexemes.next(); + consCount++; + if (!hasFlag(TPattern.MULTILINE)) { + if (!hasFlag(TPattern.UNIX_LINES)) { + term = new EOLSet(consCount); + } else { + term = new UEOLSet(consCount); + } + } else { + if (!hasFlag(TPattern.UNIX_LINES)) { + term = new MultiLineEOLSet(consCount); + } else { + term = new UMultiLineEOLSet(consCount); + } + } + + break; + } + + case Lexer.CHAR_WORD_BOUND: { + lexemes.next(); + term = new WordBoundary(true); + break; + } + + case Lexer.CHAR_NONWORD_BOUND: { + lexemes.next(); + term = new WordBoundary(false); + break; + } + + case Lexer.CHAR_END_OF_INPUT: { + lexemes.next(); + term = new EOISet(); + break; + } + + case Lexer.CHAR_END_OF_LINE: { + lexemes.next(); + term = new EOLSet(++consCount); + break; + } + + case Lexer.CHAR_START_OF_INPUT: { + lexemes.next(); + term = new SOLSet(); + break; + } + + case Lexer.CHAR_PREVIOUS_MATCH: { + lexemes.next(); + term = new PreviousMatch(); + break; + } + + case 0x80000000 | '1': + case 0x80000000 | '2': + case 0x80000000 | '3': + case 0x80000000 | '4': + case 0x80000000 | '5': + case 0x80000000 | '6': + case 0x80000000 | '7': + case 0x80000000 | '8': + case 0x80000000 | '9': { + int number = (ch & 0x7FFFFFFF) - '0'; + if (globalGroupIndex >= number) { + lexemes.next(); + consCount++; + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + term = new BackReferenceSet(number, consCount); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + term = new CIBackReferenceSet(number, consCount); + } else { + term = new UCIBackReferenceSet(number, consCount); + } + (backRefs [number]).isBackReferenced = true; + needsBackRefReplacement = true; + break; + } else { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } + } + + case 0: { + AbstractCharClass cc = null; + if ((cc = (AbstractCharClass) lexemes.peekSpecial()) != null) { + term = processRangeSet(cc); + } else if (!lexemes.isEmpty()) { + + //ch == 0 + term = new CharSet((char) ch); + } else { + term = new EmptySet(last); + break; + } + lexemes.next(); + break; + } + + default: { + if (ch >= 0 && !lexemes.isSpecial()) { + term = processCharSet(ch); + lexemes.next(); + } else if (ch == Lexer.CHAR_VERTICAL_BAR) { + term = new EmptySet(last); + } else if (ch == Lexer.CHAR_RIGHT_PARENTHESIS) { + if (last instanceof FinalSet) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } else { + term = new EmptySet(last); + } + } else { + throw new TPatternSyntaxException( + (lexemes.isSpecial() ? lexemes.peekSpecial() + .toString() : Character + .toString((char) ch)), lexemes + .toString(), lexemes.getIndex()); + } + } + } + } while (ch == Lexer.CHAR_FLAGS); + return term; + } + + private AbstractSet processRange(boolean negative, AbstractSet last) { + AbstractCharClass res = processRangeExpression(negative); + AbstractSet rangeSet = processRangeSet(res); + rangeSet.setNext(last); + + return rangeSet; + } + + /** + * process [...] ranges + */ + private CharClass processRangeExpression(boolean alt) { + CharClass res = new CharClass(alt, hasFlag(TPattern.CASE_INSENSITIVE), + hasFlag(TPattern.UNICODE_CASE)); + int buffer = -1; + boolean intersection = false; + boolean notClosed = false; + boolean firstInClass = true; + + while (!lexemes.isEmpty() + && (notClosed = (lexemes.peek()) != Lexer.CHAR_RIGHT_SQUARE_BRACKET + || firstInClass)) { + switch (lexemes.peek()) { + + case Lexer.CHAR_RIGHT_SQUARE_BRACKET: { + if (buffer >= 0) + res.add(buffer); + buffer = ']'; + lexemes.next(); + break; + } + case Lexer.CHAR_LEFT_SQUARE_BRACKET: { + if (buffer >= 0) { + res.add(buffer); + buffer = -1; + } + lexemes.next(); + boolean negative = false; + if (lexemes.peek() == Lexer.CHAR_CARET) { + lexemes.next(); + negative = true; + } + + if (intersection) + res.intersection(processRangeExpression(negative)); + else + res.union(processRangeExpression(negative)); + intersection = false; + lexemes.next(); + break; + } + + case Lexer.CHAR_AMPERSAND: { + if (buffer >= 0) + res.add(buffer); + buffer = lexemes.next(); + + /* + * if there is a start for subrange we will do an intersection + * otherwise treat '&' as a normal character + */ + if (lexemes.peek() == Lexer.CHAR_AMPERSAND) { + if (lexemes.lookAhead() + == Lexer.CHAR_LEFT_SQUARE_BRACKET) { + lexemes.next(); + intersection = true; + buffer = -1; + } else { + lexemes.next(); + if (firstInClass) { + + //skip "&&" at "[&&...]" or "[^&&...]" + res = processRangeExpression(false); + } else { + + //ignore "&&" at "[X&&]" ending where X != empty string + if (!(lexemes.peek() + == Lexer.CHAR_RIGHT_SQUARE_BRACKET)) { + res.intersection(processRangeExpression(false)); + } + } + + } + } else { + + //treat '&' as a normal character + buffer = '&'; + } + + break; + } + + case Lexer.CHAR_HYPHEN: { + if (firstInClass + || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET + || lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET + || buffer < 0) { + // treat hypen as normal character + if (buffer >= 0) + res.add(buffer); + buffer = '-'; + lexemes.next(); + // range + } else { + lexemes.next(); + int cur = lexemes.peek(); + + if (!lexemes.isSpecial() + && (cur >= 0 + || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET + || lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET || buffer < 0)) { + + try { + if (!Lexer.isLetter(cur)) { + cur = cur & 0xFFFF; + } + res.add(buffer, cur); + } catch (Exception e) { + throw new TPatternSyntaxException("", pattern(), lexemes.getIndex()); + } + lexemes.next(); + buffer = -1; + } else { + throw new TPatternSyntaxException("", pattern(), lexemes.getIndex()); + } + } + + break; + } + + case Lexer.CHAR_CARET: { + if (buffer >= 0) + res.add(buffer); + buffer = '^'; + lexemes.next(); + break; + } + + case 0: { + if (buffer >= 0) + res.add(buffer); + AbstractCharClass cs = (AbstractCharClass) lexemes + .peekSpecial(); + if (cs != null) { + res.add(cs); + buffer = -1; + } else { + buffer = 0; + } + + lexemes.next(); + break; + } + + default: { + if (buffer >= 0) + res.add(buffer); + buffer = lexemes.next(); + break; + } + } + + firstInClass = false; + } + if (notClosed) { + throw new TPatternSyntaxException("", pattern(), lexemes.getIndex() - 1); + } + if (buffer >= 0) + res.add(buffer); + return res; + } + + private AbstractSet processCharSet(int ch) { + boolean isSupplCodePoint = Character + .isSupplementaryCodePoint(ch); + + if (hasFlag(TPattern.CASE_INSENSITIVE)) { + + if ((ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z')) { + return new CICharSet((char) ch); + } else if (hasFlag(TPattern.UNICODE_CASE) + && ch > 128) { + if (isSupplCodePoint) { + return new UCISupplCharSet(ch); + } else if (Lexer.isLowSurrogate(ch)) { + + //we need no UCILowSurrogateCharSet + return new LowSurrogateCharSet((char) ch); + } else if (Lexer.isHighSurrogate(ch)) { + + //we need no UCIHighSurrogateCharSet + return new HighSurrogateCharSet((char) ch); + } else { + return new UCICharSet((char) ch); + } + } + } + + if (isSupplCodePoint) { + return new SupplCharSet(ch); + } else if (Lexer.isLowSurrogate(ch)) { + return new LowSurrogateCharSet((char) ch); + } else if (Lexer.isHighSurrogate(ch)) { + return new HighSurrogateCharSet((char) ch); + } else { + return new CharSet((char) ch); + } + } + + private AbstractSet processRangeSet(AbstractCharClass charClass) { + if (charClass.hasLowHighSurrogates()) { + AbstractCharClass surrogates = charClass.getSurrogates(); + LowHighSurrogateRangeSet lowHighSurrRangeSet + = new LowHighSurrogateRangeSet(surrogates); + + if (charClass.mayContainSupplCodepoints()) { + if (!charClass.hasUCI()) { + return new CompositeRangeSet( + new SupplRangeSet(charClass.getWithoutSurrogates()), + lowHighSurrRangeSet); + } else { + return new CompositeRangeSet( + new UCISupplRangeSet(charClass.getWithoutSurrogates()), + lowHighSurrRangeSet); + } + } + + if (!charClass.hasUCI()) { + return new CompositeRangeSet( + new RangeSet(charClass.getWithoutSurrogates()), + lowHighSurrRangeSet); + } else { + return new CompositeRangeSet( + new UCIRangeSet(charClass.getWithoutSurrogates()), + lowHighSurrRangeSet); + } + } + + if (charClass.mayContainSupplCodepoints()) { + if (!charClass.hasUCI()) { + return new SupplRangeSet(charClass); + } else { + return new UCISupplRangeSet(charClass); + } + } + + if (!charClass.hasUCI()) { + return new RangeSet(charClass); + } else { + return new UCIRangeSet(charClass); + } + } + + /** + * Compiles a regular expression, creating a new Pattern instance in the + * process. This is actually a convenience method that calls {@link + * #compile(String, int)} with a {@code flags} value of zero. + * + * @param pattern + * the regular expression. + * + * @return the new {@code Pattern} instance. + * + * @throws TPatternSyntaxException + * if the regular expression is syntactically incorrect. + */ + public static TPattern compile(String pattern) { + return compile(pattern, 0); + } + + /* + * This method do traverses of + * automata to finish compilation. + */ + private void finalizeCompile() { + + /* + * Processing second pass + */ + if (needsBackRefReplacement) { //|| needsReason1 || needsReason2) { + start.processSecondPass(); + } + + } + + /** + * Tries to match a given regular expression against a given input. This is + * actually nothing but a convenience method that compiles the regular + * expression into a {@code Pattern}, builds a {@link TMatcher} for it, and + * then does the match. If the same regular expression is used for multiple + * operations, it is recommended to compile it into a {@code Pattern} + * explicitly and request a reusable {@code Matcher}. + * + * @param regex + * the regular expression. + * @param input + * the input to process. + * + * @return true if and only if the {@code Pattern} matches the input. + * + * @see TPattern#compile(java.lang.String, int) + * @see TMatcher#matches() + */ + public static boolean matches(String regex, CharSequence input) { + return TPattern.compile(regex).matcher(input).matches(); + } + + /** + * Quotes a given string using "\Q" and "\E", so that all other + * meta-characters lose their special meaning. If the string is used for a + * {@code Pattern} afterwards, it can only be matched literally. + * + * @param s + * the string to quote. + * + * @return the quoted string. + */ + public static String quote(String s) { + StringBuilder sb = new StringBuilder().append("\\Q"); //$NON-NLS-1$ + int apos = 0; + int k; + while ((k = s.indexOf("\\E", apos)) >= 0) { //$NON-NLS-1$ + sb.append(s.substring(apos, k + 2)).append("\\\\E\\Q"); //$NON-NLS-1$ + apos = k + 2; + } + + return sb.append(s.substring(apos)).append("\\E").toString(); //$NON-NLS-1$ + } + + /** + * return number of groups found at compile time + */ + int groupCount() { + return globalGroupIndex; + } + + int compCount() { + return this.compCount + 1; + } + + int consCount() { + return this.consCount + 1; + } + + /** + * Returns supplementary character. At this time only for ASCII chars. + */ + static char getSupplement(char ch) { + char res = ch; + if (ch >= 'a' && ch <= 'z') { + res -= 32; + } else if (ch >= 'A' && ch <= 'Z') { + res += 32; + } + + return res; + } + + /** + * @return true if pattern has specified flag + */ + private boolean hasFlag(int flag) { + return (flags & flag) == flag; + } + + /** + * Dismiss public constructor. + * + */ + private TPattern() { + } + + /** + * Serialization support + */ + private void readObject(java.io.ObjectInputStream s) + throws java.io.IOException, ClassNotFoundException { + s.defaultReadObject(); + AbstractSet.counter = 1; + globalGroupIndex = -1; + compCount = -1; + consCount = -1; + backRefs = new FSet [BACK_REF_NUMBER]; + + compileImpl(pattern, flags); + + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java new file mode 100644 index 000000000..6a79fac7b --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +import java.util.Arrays; + +/** + * Encapsulates a syntax error that occurred during the compilation of a + * {@link TPattern}. Might include a detailed description, the original regular + * expression, and the index at which the error occurred. + * + * @see TPattern#compile(String) + * @see TPattern#compile(java.lang.String,int) + * + * @author Nikolay A. Kuznetsov + */ +public class TPatternSyntaxException extends IllegalArgumentException { + + private static final long serialVersionUID = -3864639126226059218L; + + /** + * Holds the description of the syntax error, or null if the description is + * not known. + */ + private String desc; + + /** + * Holds the syntactically incorrect regular expression, or null if the + * regular expression is not known. + */ + private String pattern; + + /** + * Holds the index around which the error occured, or -1, in case it is + * unknown. + */ + private int index = -1; + + /** + * Creates a new PatternSyntaxException for a given message, pattern, and + * error index. + * + * @param description + * the description of the syntax error, or {@code null} if the + * description is not known. + * @param pattern + * the syntactically incorrect regular expression, or + * {@code null} if the regular expression is not known. + * @param index + * the character index around which the error occurred, or -1 if + * the index is not known. + */ + public TPatternSyntaxException(String description, String pattern, int index) { + this.desc = description; + this.pattern = pattern; + this.index = index; + } + + /** + * Returns the syntactically incorrect regular expression. + * + * @return the regular expression. + * + */ + public String getPattern() { + return pattern; + } + + /** + * Returns a detailed error message for the exception. The message is + * potentially multi-line, and it might include a detailed description, the + * original regular expression, and the index at which the error occured. + * + * @return the error message. + */ + public String getMessage() { + String filler = ""; //$NON-NLS-1$ + if (index >= 1) { + char[] temp = new char[index]; + Arrays.fill(temp, ' '); + filler = new String(temp); + } + return desc + + ((pattern != null && pattern.length() != 0) ? index + ", " + pattern + ", " + filler : ""); + } + + /** + * Returns the description of the syntax error, or {@code null} if the + * description is not known. + * + * @return the description. + */ + public String getDescription() { + return desc; + } + + /** + * Returns the character index around which the error occurred, or -1 if the + * index is not known. + * + * @return the index. + * + */ + public int getIndex() { + return index; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java new file mode 100644 index 000000000..a79603576 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Unicode case insensitive back reference (i.e. \1-9) node. + * + * @author Nikolay A. Kuznetsov + */ +class UCIBackReferenceSet extends CIBackReferenceSet { + + int groupIndex; + + public UCIBackReferenceSet(int groupIndex, int consCounter) { + super(groupIndex, consCounter); + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + String group = getString(matchResult); + + if (group == null + || (stringIndex + group.length()) > matchResult.getRightBound()) + return -1; + + for (int i = 0; i < group.length(); i++) { + if (Character.toLowerCase(Character.toUpperCase(group.charAt(i))) != Character + .toLowerCase(Character.toUpperCase(testString + .charAt(stringIndex + i)))) { + return -1; + } + } + matchResult.setConsumed(consCounter, group.length()); + return next.matches(stringIndex + group.length(), testString, + matchResult); + } + + public String getName() { + return "UCI back reference: " + this.groupIndex; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java new file mode 100644 index 000000000..885006dda --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character in unicode case + * insensitive manner. + * + * @author Nikolay A. Kuznetsov + */ +class UCICharSet extends LeafSet { + + private char ch; + + public UCICharSet(char ch) { + this.ch = Character.toLowerCase(Character.toUpperCase(ch)); + } + + public int accepts(int strIndex, CharSequence testString) { + return (this.ch == Character.toLowerCase(Character + .toUpperCase(testString.charAt(strIndex)))) ? 1 : -1; + } + + protected String getName() { + return "UCI " + ch; //$NON-NLS-1$ + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIDecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIDecomposedCharSet.java new file mode 100644 index 000000000..e95b6cd6b --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIDecomposedCharSet.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents Unicode case insensitive + * canonical decomposition of + * Unicode character. Is used when + * CANON_EQ flag of Pattern class + * is specified. + */ +class UCIDecomposedCharSet extends DecomposedCharSet{ + + /* + * Just only a stub + */ + public UCIDecomposedCharSet(int [] decomp, int decomposedCharLength) { + super(decomp, decomposedCharLength); + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java new file mode 100644 index 000000000..638addb91 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character from the given char class. Note, + * this class contains normalized characters fo unicode case, asci case is + * supported through adding both symbols to the range. + * + * @author Nikolay A. Kuznetsov + */ +class UCIRangeSet extends LeafSet { + + private AbstractCharClass chars; + + private boolean alt = false; + + public UCIRangeSet(AbstractCharClass cs, AbstractSet next) { + super(next); + this.chars = cs.getInstance(); + this.alt = cs.alt; + } + + public UCIRangeSet(AbstractCharClass cc) { + this.chars = cc.getInstance(); + this.alt = cc.alt; + } + + public int accepts(int strIndex, CharSequence testString) { + return (chars.contains(Character.toLowerCase(Character + .toUpperCase(testString.charAt(strIndex))))) ? 1 : -1; + } + + protected String getName() { + return "UCI range:" + (alt ? "^ " : " ") + chars.toString(); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + } +} \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java new file mode 100644 index 000000000..55e9e8f4e --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Node accepting substrings in unicode case insensitive manner. + * + * @author Nikolay A. Kuznetsov + */ +class UCISequenceSet extends LeafSet { + + private String string = null; + + UCISequenceSet(StringBuffer substring) { + StringBuilder res = new StringBuilder(); + for (int i = 0; i < substring.length(); i++) { + res.append(Character.toLowerCase(Character.toUpperCase(substring + .charAt(i)))); + } + this.string = res.toString(); + this.charCount = res.length(); + } + + public int accepts(int strIndex, CharSequence testString) { + for (int i = 0; i < string.length(); i++) { + if (string.charAt(i) != Character.toLowerCase(Character + .toUpperCase(testString.charAt(strIndex + i)))) { + return -1; + } + } + + return string.length(); + + } + + public String getName() { + return "UCI sequence: " + string; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java new file mode 100644 index 000000000..f32ff886b --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single supplementary + * codepoint in Unicode case insensitive manner. + */ +class UCISupplCharSet extends LeafSet { + + //int value of this supplementary codepoint + private int ch; + + public UCISupplCharSet(int ch) { + charCount = 2; + this.ch = Character.toLowerCase(Character.toUpperCase(ch)); + } + + public int accepts(int strIndex, CharSequence testString) { + char high = testString.charAt(strIndex++); + char low = testString.charAt(strIndex); + return (this.ch == Character.toLowerCase(Character + .toUpperCase(Character + .toCodePoint(high, low)))) ? 2 : -1; + } + + protected String getName() { + return "UCI " + new String(Character.toChars(ch)); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java new file mode 100644 index 000000000..159019c31 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode. + * + * COPYRIGHT AND PERMISSION NOTICE + * + * Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under + * the Terms of Use in http://www.unicode.org/copyright.html. Permission is + * hereby granted, free of charge, to any person obtaining a copy of the + * Unicode data files and any associated documentation (the "Data Files") + * or Unicode software and any associated documentation (the "Software") + * to deal in the Data Files or Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, distribute, + * and/or sell copies of the Data Files or Software, and to permit persons + * to whom the Data Files or Software are furnished to do so, provided that + * (a) the above copyright notice(s) and this permission notice appear with + * all copies of the Data Files or Software, (b) both the above copyright + * notice(s) and this permission notice appear in associated documentation, + * and (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or Software + * that the data or software has been modified. + + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS + * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in these Data Files or Software without prior written + * authorization of the copyright holder. + * + * 2. Additional terms from the Database: + * + * Copyright © 1995-1999 Unicode, Inc. All Rights reserved. + * + * Disclaimer + * + * The Unicode Character Database is provided as is by Unicode, Inc. + * No claims are made as to fitness for any particular purpose. No warranties + * of any kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been purchased + * on magnetic or optical media from Unicode, Inc., the sole remedy for any claim + * will be exchange of defective media within 90 days of receipt. This disclaimer + * is applicable for all other data files accompanying the Unicode Character Database, + * some of which have been compiled by the Unicode Consortium, and some of which + * have been supplied by other sources. + * + * Limitations on Rights to Redistribute This Data + * + * Recipient is granted the right to make copies in any form for internal + * distribution and to freely use the information supplied in the creation of + * products supporting the UnicodeTM Standard. The files in + * the Unicode Character Database can be redistributed to third parties or other + * organizations (whether for profit or not) as long as this notice and the disclaimer + * notice are retained. Information can be extracted from these files and used + * in documentation or programs, as long as there is an accompanying notice + * indicating the source. + */ + +package org.teavm.classlib.java.util.regex; + +/** + * Represents node accepting single character from the given char class + * in Unicode case insensitive manner. + * This character can be supplementary (2 chars to represent) or from + * basic multilingual pane (1 char to represent). + */ +class UCISupplRangeSet extends SupplRangeSet{ + + public UCISupplRangeSet(AbstractCharClass cs, AbstractSet next) { + super(cs, next); + } + + public UCISupplRangeSet(AbstractCharClass cc) { + super(cc); + } + + public boolean contains(int ch) { + return chars.contains(Character.toLowerCase(Character.toUpperCase(ch))); + } + + protected String getName() { + return "UCI range:" + (alt ? "^ " : " ") + chars.toString(); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java new file mode 100644 index 000000000..aa61db378 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Unix line terminator, accepting only \n. + * + * @author Nikolay A. Kuznetsov + */ +final class UEOLSet extends AbstractSet { + + private int consCounter; + + public UEOLSet(int counter) { + this.consCounter = counter; + } + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + int rightBound = matchResult.hasAnchoringBounds() ? matchResult + .getRightBound() : testString.length(); + + if (strIndex >= rightBound) { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } + // check final line terminator; + + if ((rightBound - strIndex) == 1 && testString.charAt(strIndex) == '\n') { + matchResult.setConsumed(consCounter, 1); + return next.matches(strIndex + 1, testString, matchResult); + } + + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons; + boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); + matchResult.setConsumed(consCounter, -1); + return res; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java new file mode 100644 index 000000000..6a77173a9 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Unix style multiline end-of-line node. + * + * @author Nikolay A. Kuznetsov + */ +class UMultiLineEOLSet extends AbstractSet { + + private int consCounter; + + public UMultiLineEOLSet(int counter) { + this.consCounter = counter; + } + + public int matches(int strIndex, CharSequence testString, + MatchResultImpl matchResult) { + int strDif = matchResult.hasAnchoringBounds() ? matchResult + .getRightBound() + - strIndex : testString.length() - strIndex; + if (strDif <= 0) { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } else if (testString.charAt(strIndex) == '\n') { + matchResult.setConsumed(consCounter, 1); + return next.matches(strIndex + 1, testString, matchResult); + } + return -1; + } + + public boolean hasConsumed(MatchResultImpl matchResult) { + int cons; + boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); + matchResult.setConsumed(consCounter, -1); + return res; + } + + protected String getName() { + return ""; //$NON-NLS-1$ + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java new file mode 100644 index 000000000..40fce58ed --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Unicode category (i.e. Ll, Lu). + * + * @author Nikolay A. Kuznetsov + */ +class UnicodeCategory extends AbstractCharClass { + + protected int category; + + public UnicodeCategory(int category) { + this.category = category; + } + + public boolean contains(int ch) { + return alt ^ (category == Character.getType((char) ch)); + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java new file mode 100644 index 000000000..514fcfe51 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Unicode category scope (i.e IsL, IsM, ...) + * @author Nikolay A. Kuznetsov + */ +class UnicodeCategoryScope extends UnicodeCategory { + + public UnicodeCategoryScope(int category) { + super(category); + } + + public boolean contains(int ch) { + return alt ^ ((category >> Character.getType((char) ch)) & 1) != 0; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java new file mode 100644 index 000000000..2450f50aa --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Greedy quantifier node for the case where there is no intersection with + * next node and normal quantifiers could be treated as greedy and possessive. + * + * @author Nikolay A. Kuznetsov + */ +class UnifiedQuantifierSet extends LeafQuantifierSet { + + public UnifiedQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { + super(innerSet, next, type); + } + + public UnifiedQuantifierSet(LeafQuantifierSet quant) { + super((LeafSet) quant.getInnerSet(), quant.getNext(), quant.getType()); + innerSet.setNext(this); + + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() + && leaf.accepts(stringIndex, testString) > 0) + stringIndex += leaf.charCount(); + + return next.matches(stringIndex, testString, matchResult); + } + + public int find(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + int startSearch = next.find(stringIndex, testString, matchResult); + if (startSearch < 0) + return -1; + int newSearch = startSearch - leaf.charCount(); + while (newSearch >= stringIndex + && leaf.accepts(newSearch, testString) > 0) { + startSearch = newSearch; + newSearch -= leaf.charCount(); + } + + return startSearch; + } +} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java new file mode 100644 index 000000000..bb29647f9 --- /dev/null +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Nikolay A. Kuznetsov + */ +package org.teavm.classlib.java.util.regex; + +/** + * Represents word boundary, checks current character and previous one if + * different types returns true; + * + * @author Nikolay A. Kuznetsov + */ +class WordBoundary extends AbstractSet { + + boolean positive; + + public WordBoundary(boolean positive) { + this.positive = positive; + } + + public int matches(int stringIndex, CharSequence testString, + MatchResultImpl matchResult) { + boolean left; + boolean right; + + char ch1 = stringIndex >= matchResult.getRightBound() ? ' ' : testString + .charAt(stringIndex); + char ch2 = stringIndex == 0 ? ' ' : testString.charAt(stringIndex - 1); + + int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult + .getLeftBound(); + left = (ch1 == ' ') || isSpace(ch1, stringIndex, leftBound, testString); + right = (ch2 == ' ') + || isSpace(ch2, stringIndex - 1, leftBound, testString); + return ((left ^ right) ^ positive) ? -1 : next.matches(stringIndex, + testString, matchResult); + } + + /** + * Returns false, because word boundary does not consumes any characters and + * do not move string index. + */ + public boolean hasConsumed(MatchResultImpl matchResult) { + // only checks boundary, do not consumes characters + return false; + } + + protected String getName() { + return "WordBoundary"; //$NON-NLS-1$ + } + + private boolean isSpace(char ch, int index, int leftBound, + CharSequence testString) { + if (Character.isLetterOrDigit(ch) || ch == '_') + return false; + if (Character.getType(ch) == Character.NON_SPACING_MARK) { + for (; --index >= leftBound;) { + ch = testString.charAt(index); + if (Character.isLetterOrDigit(ch)) + return false; + if (Character.getType(ch) != Character.NON_SPACING_MARK) + return true; + } + } + return true; + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Matcher2Test.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Matcher2Test.java new file mode 100644 index 000000000..ece7d0ba3 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Matcher2Test.java @@ -0,0 +1,234 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + +/** + * Tests Matcher methods + */ +@SuppressWarnings("nls") +public class Matcher2Test extends TestCase { + public void test_toString() { + Pattern p = Pattern.compile("foo"); + Matcher m = p.matcher("bar"); + assertNotNull(m.toString()); + } + + public void testErrorConditions() throws PatternSyntaxException { + // Test match cursors in absence of a match + Pattern p = Pattern.compile("foo"); + Matcher m = p.matcher("bar"); + assertFalse(m.matches()); + + try { + m.start(); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.end(); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.group(); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.start(1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.end(1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.group(1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + // regression test for HARMONY-2418 + try { + m.usePattern(null); + fail("IllegalArgumentException expected"); + } catch (IllegalArgumentException e) { + // PASSED + } + } + + public void testErrorConditions2() throws PatternSyntaxException { + // Test match cursors in absence of a match + Pattern p = Pattern.compile("(foo[0-9])(bar[a-z])"); + Matcher m = p.matcher("foo1barzfoo2baryfoozbar5"); + + assertTrue(m.find()); + assertEquals(0, m.start()); + assertEquals(8, m.end()); + assertEquals(0, m.start(1)); + assertEquals(4, m.end(1)); + assertEquals(4, m.start(2)); + assertEquals(8, m.end(2)); + + try { + m.start(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.end(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.group(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.start(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.end(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.group(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + assertTrue(m.find()); + assertEquals(8, m.start()); + assertEquals(16, m.end()); + assertEquals(8, m.start(1)); + assertEquals(12, m.end(1)); + assertEquals(12, m.start(2)); + assertEquals(16, m.end(2)); + + try { + m.start(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.end(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.group(3); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.start(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.end(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + try { + m.group(-1); + fail("IndexOutOfBoundsException expected"); + } catch (IndexOutOfBoundsException e) { + } + + assertFalse(m.find()); + + try { + m.start(3); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.end(3); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.group(3); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.start(-1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.end(-1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + + try { + m.group(-1); + fail("IllegalStateException expected"); + } catch (IllegalStateException e) { + } + } + + /* + * Regression test for HARMONY-997 + */ + public void testReplacementBackSlash() { + String str = "replace me"; + String replacedString = "me"; + String substitutionString = "\\"; + Pattern pat = Pattern.compile(replacedString); + Matcher mat = pat.matcher(str); + try { + mat.replaceAll(substitutionString); + fail("IndexOutOfBoundsException should be thrown"); + } catch (IndexOutOfBoundsException e) { + } + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/MatcherTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/MatcherTest.java new file mode 100644 index 000000000..12af6b879 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/MatcherTest.java @@ -0,0 +1,772 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import static org.junit.Assert.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.Test; + + +@SuppressWarnings("nls") +public class MatcherTest { + String[] testPatterns = { + "(a|b)*abb", + "(1*2*3*4*)*567", + "(a|b|c|d)*aab", + "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", + "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", + "(a|b)*(a|b)*A(a|b)*lice.*", + "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" + + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do" }; + + String[] groupPatterns = { "(a|b)*aabb", "((a)|b)*aabb", "((a|b)*)a(abb)", + "(((a)|(b))*)aabb", "(((a)|(b))*)aa(b)b", "(((a)|(b))*)a(a(b)b)" }; + + @Test + public void testRegionsIntInt() { + Pattern p = Pattern.compile("x*"); + Matcher m = p.matcher("axxxxxa"); + assertFalse(m.matches()); + + m.region(1, 6); + assertEquals(1, m.regionStart()); + assertEquals(6, m.regionEnd()); + assertTrue(m.matches()); + + try { + m.region(1, 0); + fail("expected an IOOBE"); + } catch(IndexOutOfBoundsException e) { + } + + try { + m.region(-1, 2); + fail("expected an IOOBE"); + } catch(IndexOutOfBoundsException e) { + } + + try { + m.region(10, 11); + fail("expected an IOOBE"); + } catch(IndexOutOfBoundsException e) { + } + + try { + m.region(1, 10); + fail("expected an IOOBE"); + } catch(IndexOutOfBoundsException e) { + } + } + + @Test + public void testAppendReplacement() { + Pattern pat = Pattern.compile("XX"); + Matcher m = pat.matcher("Today is XX-XX-XX ..."); + StringBuffer sb = new StringBuffer(); + + for (int i = 0; m.find(); i++) { + m.appendReplacement(sb, new Integer(i * 10 + i).toString()); + } + m.appendTail(sb); + assertEquals("Today is 0-11-22 ...", sb.toString()); + } + + @Test + public void testAppendReplacementRef() { + Pattern p = Pattern.compile("xx (rur|\\$)"); + Matcher m = p.matcher("xx $ equals to xx rur."); + StringBuffer sb = new StringBuffer(); + for (int i = 1; m.find(); i *= 30) { + String rep = new Integer(i).toString() + " $1"; + m.appendReplacement(sb, rep); + } + m.appendTail(sb); + assertEquals("1 $ equals to 30 rur.", sb.toString()); + } + + @Test + public void testReplaceAll() { + String input = "aabfooaabfooabfoob"; + String pattern = "a*b"; + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + + assertEquals("-foo-foo-foo-", mat.replaceAll("-")); + } + + @Test + public void testResetCharSequence() { + Pattern p = Pattern.compile("abcd"); + Matcher m = p.matcher("abcd"); + assertTrue(m.matches()); + m.reset("efgh"); + assertFalse(m.matches()); + + try { + m.reset(null); + fail("expected a NPE"); + } catch (NullPointerException e) { + } + } + + @Test + public void testAppendSlashes() { + Pattern p = Pattern.compile("\\\\"); + Matcher m = p.matcher("one\\cat\\two\\cats\\in\\the\\yard"); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + m.appendReplacement(sb, "\\\\"); + } + m.appendTail(sb); + assertEquals("one\\cat\\two\\cats\\in\\the\\yard", sb.toString()); + + } + + @Test + public void testReplaceFirst() { + String input = "zzzdogzzzdogzzz"; + String pattern = "dog"; + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + + assertEquals("zzzcatzzzdogzzz", mat.replaceFirst("cat")); + } + + @Test + public void testPattern() { + for (String element : testPatterns) { + Pattern test = Pattern.compile(element); + assertEquals(test, test.matcher("aaa").pattern()); + } + + for (String element : testPatterns) { + assertEquals(element, Pattern.compile(element).matcher("aaa") + .pattern().toString()); + } + } + + /* + * Class under test for Matcher reset() + */ + public void testReset() { + } + + @Test + public void testGroupint() { + String positiveTestString = "ababababbaaabb"; + + // test IndexOutOfBoundsException + // // + for (int i = 0; i < groupPatterns.length; i++) { + Pattern test = Pattern.compile(groupPatterns[i]); + Matcher mat = test.matcher(positiveTestString); + mat.matches(); + try { + // groupPattern equals to number of groups + // of the specified pattern + // // + mat.group(i + 2); + fail("IndexOutBoundsException expected"); + mat.group(i + 100); + fail("IndexOutBoundsException expected"); + mat.group(-1); + fail("IndexOutBoundsException expected"); + mat.group(-100); + fail("IndexOutBoundsException expected"); + } catch (IndexOutOfBoundsException iobe) { + } + } + + String[][] groupResults = { { "a" }, { "a", "a" }, + { "ababababba", "a", "abb" }, { "ababababba", "a", "a", "b" }, + { "ababababba", "a", "a", "b", "b" }, + { "ababababba", "a", "a", "b", "abb", "b" }, }; + + for (int i = 0; i < groupPatterns.length; i++) { + Pattern test = Pattern.compile(groupPatterns[i]); + Matcher mat = test.matcher(positiveTestString); + mat.matches(); + for (int j = 0; j < groupResults[i].length; j++) { + assertEquals("i: " + i + " j: " + j, groupResults[i][j], mat + .group(j + 1)); + } + + } + + } + + @Test + public void testGroup() { + String positiveTestString = "ababababbaaabb"; + String negativeTestString = "gjhfgdsjfhgcbv"; + for (String element : groupPatterns) { + Pattern test = Pattern.compile(element); + Matcher mat = test.matcher(positiveTestString); + mat.matches(); + // test result + assertEquals(positiveTestString, mat.group()); + + // test equal to group(0) result + assertEquals(mat.group(0), mat.group()); + } + + for (String element : groupPatterns) { + Pattern test = Pattern.compile(element); + Matcher mat = test.matcher(negativeTestString); + mat.matches(); + try { + mat.group(); + fail("IllegalStateException expected for matches result"); + } catch (IllegalStateException ise) { + } + } + } + + @Test + public void testGroupPossessive() { + Pattern pat = Pattern.compile("((a)|(b))++c"); + Matcher mat = pat.matcher("aac"); + + mat.matches(); + assertEquals("a", mat.group(1)); + } + + /* + * Class under test for boolean find(int) + */ + public void testFindint() { + } + + /* + * Class under test for int start(int) + */ + public void testStartint() { + } + + /* + * Class under test for int end(int) + */ + public void testEndint() { + } + + @Test + public void testMatchesMisc() { + String[][] posSeq = { + { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, + { "213567", "12324567", "1234567", "213213567", + "21312312312567", "444444567" }, + { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, + { "213234567", "3458", "0987654", "7689546432", "0398576", + "98432", "5" }, + { + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, + { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", + "abbbAbbbliceaaa", "Alice" }, + { "a123", "bnxnvgds156", "for", "while", "if", "struct" } + + }; + + for (int i = 0; i < testPatterns.length; i++) { + Pattern pat = Pattern.compile(testPatterns[i]); + for (int j = 0; j < posSeq[i].length; j++) { + Matcher mat = pat.matcher(posSeq[i][j]); + assertTrue("Incorrect match: " + testPatterns[i] + " vs " + + posSeq[i][j], mat.matches()); + } + } + } + + @Test + public void testMatchesQuantifiers() { + String[] testPatternsSingles = { "a{5}", "a{2,4}", "a{3,}" }; + String[] testPatternsMultiple = { "((a)|(b)){1,2}abb", + "((a)|(b)){2,4}", "((a)|(b)){3,}" }; + + String[][] stringSingles = { { "aaaaa", "aaa" }, + { "aa", "a", "aaa", "aaaaaa", "aaaa", "aaaaa" }, + { "aaa", "a", "aaaa", "aa" }, }; + + String[][] stringMultiples = { { "ababb", "aba" }, + { "ab", "b", "bab", "ababa", "abba", "abababbb" }, + { "aba", "b", "abaa", "ba" }, }; + + for (int i = 0; i < testPatternsSingles.length; i++) { + Pattern pat = Pattern.compile(testPatternsSingles[i]); + for (int j = 0; j < stringSingles.length / 2; j++) { + assertTrue("Match expected, but failed: " + pat.pattern() + + " : " + stringSingles[i][j], pat.matcher( + stringSingles[i][j * 2]).matches()); + assertFalse("Match failure expected, but match succeed: " + + pat.pattern() + " : " + stringSingles[i][j * 2 + 1], + pat.matcher(stringSingles[i][j * 2 + 1]).matches()); + } + } + + for (int i = 0; i < testPatternsMultiple.length; i++) { + Pattern pat = Pattern.compile(testPatternsMultiple[i]); + for (int j = 0; j < stringMultiples.length / 2; j++) { + assertTrue("Match expected, but failed: " + pat.pattern() + + " : " + stringMultiples[i][j], pat.matcher( + stringMultiples[i][j * 2]).matches()); + assertFalse( + "Match failure expected, but match succeed: " + + pat.pattern() + " : " + + stringMultiples[i][j * 2 + 1], pat.matcher( + stringMultiples[i][j * 2 + 1]).matches()); + } + } + } + + @Test + public void testQuantVsGroup() { + String patternString = "(d{1,3})((a|c)*)(d{1,3})((a|c)*)(d{1,3})"; + String testString = "dacaacaacaaddaaacaacaaddd"; + + Pattern pat = Pattern.compile(patternString); + Matcher mat = pat.matcher(testString); + + mat.matches(); + assertEquals("dacaacaacaaddaaacaacaaddd", mat.group()); + assertEquals("d", mat.group(1)); + assertEquals("acaacaacaa", mat.group(2)); + assertEquals("dd", mat.group(4)); + assertEquals("aaacaacaa", mat.group(5)); + assertEquals("ddd", mat.group(7)); + } + + public void testLookingAt() { + } + + /* + * Class under test for boolean find() + */ + public void testFind() { + String testPattern = "(abb)"; + String testString = "cccabbabbabbabbabb"; + Pattern pat = Pattern.compile(testPattern); + Matcher mat = pat.matcher(testString); + int start = 3; + int end = 6; + while (mat.find()) { + assertEquals(start, mat.start(1)); + assertEquals(end, mat.end(1)); + + start = end; + end += 3; + } + + testPattern = "(\\d{1,3})"; + testString = "aaaa123456789045"; + + Pattern pat2 = Pattern.compile(testPattern); + Matcher mat2 = pat2.matcher(testString); + start = 4; + int length = 3; + while (mat2.find()) { + assertEquals(testString.substring(start, start + length), mat2 + .group(1)); + start += length; + } + } + + @Test + public void testSEOLsymbols() { + Pattern pat = Pattern.compile("^a\\(bb\\[$"); + Matcher mat = pat.matcher("a(bb["); + + assertTrue(mat.matches()); + } + + /* + * Class under test for int start() + */ + public void testStart() { + } + + @Test + public void testGroupCount() { + for (int i = 0; i < groupPatterns.length; i++) { + Pattern test = Pattern.compile(groupPatterns[i]); + Matcher mat = test.matcher("ababababbaaabb"); + mat.matches(); + assertEquals(i + 1, mat.groupCount()); + + } + } + + @Test + public void testRelactantQuantifiers() { + Pattern pat = Pattern.compile("(ab*)*b"); + Matcher mat = pat.matcher("abbbb"); + + if (mat.matches()) { + assertEquals("abbb", mat.group(1)); + } else { + fail("Match expected: (ab*)*b vs abbbb"); + } + } + + @Test + public void testEnhancedFind() { + String input = "foob"; + String pattern = "a*b"; + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + + mat.find(); + assertEquals("b", mat.group()); + } + + @Test + public void testPosCompositeGroup() { + String[] posExamples = { "aabbcc", "aacc", "bbaabbcc" }; + String[] negExamples = { "aabb", "bb", "bbaabb" }; + Pattern posPat = Pattern.compile("(aa|bb){1,3}+cc"); + Pattern negPat = Pattern.compile("(aa|bb){1,3}+bb"); + + Matcher mat; + for (String element : posExamples) { + mat = posPat.matcher(element); + assertTrue(mat.matches()); + } + + for (String element : negExamples) { + mat = negPat.matcher(element); + assertFalse(mat.matches()); + } + + assertTrue(Pattern.matches("(aa|bb){1,3}+bb", "aabbaabb")); + + } + + @Test + public void testPosAltGroup() { + String[] posExamples = { "aacc", "bbcc", "cc" }; + String[] negExamples = { "bb", "aa" }; + Pattern posPat = Pattern.compile("(aa|bb)?+cc"); + Pattern negPat = Pattern.compile("(aa|bb)?+bb"); + + Matcher mat; + for (String element : posExamples) { + mat = posPat.matcher(element); + assertTrue(posPat.toString() + " vs: " + element, mat.matches()); + } + + for (String element : negExamples) { + mat = negPat.matcher(element); + assertFalse(mat.matches()); + } + + assertTrue(Pattern.matches("(aa|bb)?+bb", "aabb")); + } + + @Test + public void testRelCompGroup() { + + Matcher mat; + Pattern pat; + String res = ""; + for (int i = 0; i < 4; i++) { + pat = Pattern.compile("((aa|bb){" + i + ",3}?).*cc"); + mat = pat.matcher("aaaaaacc"); + assertTrue(pat.toString() + " vs: " + "aaaaaacc", mat.matches()); + assertEquals(res, mat.group(1)); + res += "aa"; + } + } + + @Test + public void testRelAltGroup() { + + Matcher mat; + Pattern pat; + + pat = Pattern.compile("((aa|bb)??).*cc"); + mat = pat.matcher("aacc"); + assertTrue(pat.toString() + " vs: " + "aacc", mat.matches()); + assertEquals("", mat.group(1)); + + pat = Pattern.compile("((aa|bb)??)cc"); + mat = pat.matcher("aacc"); + assertTrue(pat.toString() + " vs: " + "aacc", mat.matches()); + assertEquals("aa", mat.group(1)); + } + + @Test + public void testIgnoreCase() { + Pattern pat = Pattern.compile("(aa|bb)*", Pattern.CASE_INSENSITIVE); + Matcher mat = pat.matcher("aAbb"); + + assertTrue(mat.matches()); + + pat = Pattern.compile("(a|b|c|d|e)*", Pattern.CASE_INSENSITIVE); + mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa"); + assertTrue(mat.matches()); + + pat = Pattern.compile("[a-e]*", Pattern.CASE_INSENSITIVE); + mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa"); + assertTrue(mat.matches()); + + } + + @Test + public void testQuoteReplacement() { + assertEquals("\\\\aaCC\\$1", Matcher.quoteReplacement("\\aaCC$1")); + } + + @Test + public void testOverFlow() { + Pattern tp = Pattern.compile("(a*)*"); + Matcher tm = tp.matcher("aaa"); + assertTrue(tm.matches()); + assertEquals("", tm.group(1)); + + assertTrue(Pattern.matches("(1+)\\1+", "11")); + assertTrue(Pattern.matches("(1+)(2*)\\2+", "11")); + + Pattern pat = Pattern.compile("(1+)\\1*"); + Matcher mat = pat.matcher("11"); + + assertTrue(mat.matches()); + assertEquals("11", mat.group(1)); + + pat = Pattern.compile("((1+)|(2+))(\\2+)"); + mat = pat.matcher("11"); + + assertTrue(mat.matches()); + assertEquals("1", mat.group(2)); + assertEquals("1", mat.group(1)); + assertEquals("1", mat.group(4)); + assertNull(mat.group(3)); + + } + + @Test + public void testUnicode() { + + assertTrue(Pattern.matches("\\x61a", "aa")); + assertTrue(Pattern.matches("\\u0061a", "aa")); + assertTrue(Pattern.matches("\\0141a", "aa")); + assertTrue(Pattern.matches("\\0777", "?7")); + + } + + @Test + public void testUnicodeCategory() { + assertTrue(Pattern.matches("\\p{Ll}", "k")); // Unicode lower case + assertTrue(Pattern.matches("\\P{Ll}", "K")); // Unicode non-lower + // case + assertTrue(Pattern.matches("\\p{Lu}", "K")); // Unicode upper case + assertTrue(Pattern.matches("\\P{Lu}", "k")); // Unicode non-upper + // case + // combinations + assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "k")); + assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "K")); + assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "K")); + assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "k")); + + // category/character combinations + assertFalse(Pattern.matches("[\\p{L}&&[^a-z]]", "k")); + assertTrue(Pattern.matches("[\\p{L}&&[^a-z]]", "K")); + + assertTrue(Pattern.matches("[\\p{Lu}a-z]", "k")); + assertTrue(Pattern.matches("[a-z\\p{Lu}]", "k")); + + assertFalse(Pattern.matches("[\\p{Lu}a-d]", "k")); + assertTrue(Pattern.matches("[a-d\\p{Lu}]", "K")); + + // assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^K]]]", "K")); + assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^G]]]", "K")); + + } + + @Test + public void testSplitEmpty() { + + Pattern pat = Pattern.compile(""); + String[] s = pat.split("", -1); + + assertEquals(1, s.length); + assertEquals("", s[0]); + } + + @Test + public void testFindDollar() { + Matcher mat = Pattern.compile("a$").matcher("a\n"); + assertTrue(mat.find()); + assertEquals("a", mat.group()); + } + + @Test + public void testMatchesRegionChanged() { + // Regression for HARMONY-610 + String input = " word "; + Pattern pattern = Pattern.compile("\\w+"); + Matcher matcher = pattern.matcher(input); + matcher.region(1, 5); + assertTrue(matcher.matches()); + } + + @Test + public void testAllCodePoints() { + // Regression for HARMONY-3145 + int[] codePoint = new int[1]; + Pattern p = Pattern.compile("(\\p{all})+"); + boolean res = true; + int cnt = 0; + String s; + for (int i = 0; i < 0x110000; i++) { + codePoint[0] = i; + s = new String(codePoint, 0, 1); + if (!s.matches(p.toString())) { + cnt++; + res = false; + } + } + assertTrue(res); + assertEquals(0, cnt); + + p = Pattern.compile("(\\P{all})+"); + res = true; + cnt = 0; + + for (int i = 0; i < 0x110000; i++) { + codePoint[0] = i; + s = new String(codePoint, 0, 1); + if (!s.matches(p.toString())) { + cnt++; + res = false; + } + } + + assertFalse(res); + assertEquals(0x110000, cnt); + } + + @Test + public void testFindRegionChanged() { + // Regression for HARMONY-625 + Pattern pattern = Pattern.compile("(?s).*"); + Matcher matcher = pattern.matcher("abcde"); + matcher.find(); + assertEquals("abcde", matcher.group()); + + matcher = pattern.matcher("abcde"); + matcher.region(0, 2); + matcher.find(); + assertEquals("ab", matcher.group()); + + } + + @Test + public void testFindRegionChanged2() { + // Regression for HARMONY-713 + Pattern pattern = Pattern.compile("c"); + + String inputStr = "aabb.c"; + Matcher matcher = pattern.matcher(inputStr); + matcher.region(0, 3); + + assertFalse(matcher.find()); + } + + @Test + public void testPatternMatcher() throws Exception { + Pattern pattern = Pattern.compile("(?:\\d+)(?:pt)"); + assertTrue(pattern.matcher("14pt").matches()); + } + + @Test + public void test3360() { + String str = "!\"#%&'(),-./"; + Pattern p = Pattern.compile("\\s"); + Matcher m = p.matcher(str); + + assertFalse(m.find()); + } + + @Test + public void testGeneralPunctuationCategory() { + String[] s = { ",", "!", "\"", "#", "%", "&", "'", "(", ")", "-", ".", + "/" }; + String regexp = "\\p{P}"; + + for (int i = 0; i < s.length; i++) { + Pattern pattern = Pattern.compile(regexp); + Matcher matcher = pattern.matcher(s[i]); + assertTrue(matcher.find()); + } + } + + @Test + public void testHitEndAfterFind() { + hitEndTest(true, "#01.0", "r((ege)|(geg))x", "regexx", false); + hitEndTest(true, "#01.1", "r((ege)|(geg))x", "regex", false); + hitEndTest(true, "#01.2", "r((ege)|(geg))x", "rege", true); + hitEndTest(true, "#01.2", "r((ege)|(geg))x", "xregexx", false); + + hitEndTest(true, "#02.0", "regex", "rexreger", true); + hitEndTest(true, "#02.1", "regex", "raxregexr", false); + + String floatRegex = getHexFloatRegex(); + hitEndTest(true, "#03.0", floatRegex, Double.toHexString(-1.234d), true); + hitEndTest(true, "#03.1", floatRegex, "1 ABC" + + Double.toHexString(Double.NaN) + "buhuhu", false); + hitEndTest(true, "#03.2", floatRegex, Double.toHexString(-0.0) + "--", + false); + hitEndTest(true, "#03.3", floatRegex, "--" + + Double.toHexString(Double.MIN_VALUE) + "--", false); + + hitEndTest(true, "#04.0", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)", + "1 fish 2 fish red fish 5", true); + hitEndTest(true, "#04.1", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)", + "----1 fish 2 fish red fish 5----", false); + } + + @Test + public void testToString() { + String result = Pattern.compile("(\\d{1,3})").matcher( + "aaaa123456789045").toString(); + assertTrue("The result doesn't contain pattern info", result + .contains("(\\d{1,3})")); + } + + private void hitEndTest(boolean callFind, String testNo, String regex, + String input, boolean hit) { + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(input); + if (callFind) { + matcher.find(); + } else { + matcher.matches(); + } + boolean h = matcher.hitEnd(); + + assertTrue(testNo, h == hit); + } + + private String getHexFloatRegex() { + String hexDecimal = "(-|\\+)?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP](-|\\+)?[0-9]+)?"; + String notANumber = "((-|\\+)?Infinity)|([nN]a[nN])"; + return new StringBuilder("((").append(hexDecimal).append(")|(").append( + notANumber).append("))").toString(); + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ModeTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ModeTest.java new file mode 100644 index 000000000..b8b95395e --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ModeTest.java @@ -0,0 +1,111 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + +/** + * Tests Pattern compilation modes and modes triggered in pattern strings + */ +@SuppressWarnings("nls") +public class ModeTest extends TestCase { + public void testCase() throws PatternSyntaxException { + Pattern p; + Matcher m; + + p = Pattern.compile("([a-z]+)[0-9]+"); + m = p.matcher("cAT123#dog345"); + assertTrue(m.find()); + assertEquals("dog", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("([a-z]+)[0-9]+", Pattern.CASE_INSENSITIVE); + m = p.matcher("cAt123#doG345"); + assertTrue(m.find()); + assertEquals("cAt", m.group(1)); + assertTrue(m.find()); + assertEquals("doG", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("(?i)([a-z]+)[0-9]+"); + m = p.matcher("cAt123#doG345"); + assertTrue(m.find()); + assertEquals("cAt", m.group(1)); + assertTrue(m.find()); + assertEquals("doG", m.group(1)); + assertFalse(m.find()); + } + + public void testMultiline() throws PatternSyntaxException { + Pattern p; + Matcher m; + + p = Pattern.compile("^foo"); + m = p.matcher("foobar"); + assertTrue(m.find()); + assertTrue(m.start() == 0 && m.end() == 3); + assertFalse(m.find()); + + m = p.matcher("barfoo"); + assertFalse(m.find()); + + p = Pattern.compile("foo$"); + m = p.matcher("foobar"); + assertFalse(m.find()); + + m = p.matcher("barfoo"); + assertTrue(m.find()); + assertTrue(m.start() == 3 && m.end() == 6); + assertFalse(m.find()); + + p = Pattern.compile("^foo([0-9]*)", Pattern.MULTILINE); + m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4"); + assertTrue(m.find()); + assertEquals("1", m.group(1)); + assertTrue(m.find()); + assertEquals("2", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("foo([0-9]*)$", Pattern.MULTILINE); + m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4"); + assertTrue(m.find()); + assertEquals("3", m.group(1)); + assertTrue(m.find()); + assertEquals("4", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("(?m)^foo([0-9]*)"); + m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4"); + assertTrue(m.find()); + assertEquals("1", m.group(1)); + assertTrue(m.find()); + assertEquals("2", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("(?m)foo([0-9]*)$"); + m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4"); + assertTrue(m.find()); + assertEquals("3", m.group(1)); + assertTrue(m.find()); + assertEquals("4", m.group(1)); + assertFalse(m.find()); + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Pattern2Test.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Pattern2Test.java new file mode 100644 index 000000000..dfacec463 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/Pattern2Test.java @@ -0,0 +1,1412 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + +/** + * Tests simple Pattern compilation and Matcher methods + */ +@SuppressWarnings("nls") +public class Pattern2Test extends TestCase { + public void testSimpleMatch() throws PatternSyntaxException { + Pattern p = Pattern.compile("foo.*"); + + Matcher m1 = p.matcher("foo123"); + assertTrue(m1.matches()); + assertTrue(m1.find(0)); + assertTrue(m1.lookingAt()); + + Matcher m2 = p.matcher("fox"); + assertFalse(m2.matches()); + assertFalse(m2.find(0)); + assertFalse(m2.lookingAt()); + + assertTrue(Pattern.matches("foo.*", "foo123")); + assertFalse(Pattern.matches("foo.*", "fox")); + + assertFalse(Pattern.matches("bar", "foobar")); + + assertTrue(Pattern.matches("", "")); + } + + public void testCursors() { + Pattern p; + Matcher m; + + try { + p = Pattern.compile("foo"); + + m = p.matcher("foobar"); + assertTrue(m.find()); + assertEquals(0, m.start()); + assertEquals(3, m.end()); + assertFalse(m.find()); + + // Note: also testing reset here + m.reset(); + assertTrue(m.find()); + assertEquals(0, m.start()); + assertEquals(3, m.end()); + assertFalse(m.find()); + + m.reset("barfoobar"); + assertTrue(m.find()); + assertEquals(3, m.start()); + assertEquals(6, m.end()); + assertFalse(m.find()); + + m.reset("barfoo"); + assertTrue(m.find()); + assertEquals(3, m.start()); + assertEquals(6, m.end()); + assertFalse(m.find()); + + m.reset("foobarfoobarfoo"); + assertTrue(m.find()); + assertEquals(0, m.start()); + assertEquals(3, m.end()); + assertTrue(m.find()); + assertEquals(6, m.start()); + assertEquals(9, m.end()); + assertTrue(m.find()); + assertEquals(12, m.start()); + assertEquals(15, m.end()); + assertFalse(m.find()); + assertTrue(m.find(0)); + assertEquals(0, m.start()); + assertEquals(3, m.end()); + assertTrue(m.find(4)); + assertEquals(6, m.start()); + assertEquals(9, m.end()); + } catch (PatternSyntaxException e) { + System.out.println(e.getMessage()); + fail(); + } + } + + public void testGroups() throws PatternSyntaxException { + Pattern p; + Matcher m; + + p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)"); + + m = p.matcher("p1#q3p2q42p5p71p63#q888"); + assertTrue(m.find()); + assertEquals(0, m.start()); + assertEquals(5, m.end()); + assertEquals(2, m.groupCount()); + assertEquals(0, m.start(0)); + assertEquals(5, m.end(0)); + assertEquals(0, m.start(1)); + assertEquals(2, m.end(1)); + assertEquals(3, m.start(2)); + assertEquals(5, m.end(2)); + assertEquals("p1#q3", m.group()); + assertEquals("p1#q3", m.group(0)); + assertEquals("p1", m.group(1)); + assertEquals("q3", m.group(2)); + + assertTrue(m.find()); + assertEquals(5, m.start()); + assertEquals(10, m.end()); + assertEquals(2, m.groupCount()); + assertEquals(10, m.end(0)); + assertEquals(5, m.start(1)); + assertEquals(7, m.end(1)); + assertEquals(7, m.start(2)); + assertEquals(10, m.end(2)); + assertEquals("p2q42", m.group()); + assertEquals("p2q42", m.group(0)); + assertEquals("p2", m.group(1)); + assertEquals("q42", m.group(2)); + + assertTrue(m.find()); + assertEquals(15, m.start()); + assertEquals(23, m.end()); + assertEquals(2, m.groupCount()); + assertEquals(15, m.start(0)); + assertEquals(23, m.end(0)); + assertEquals(15, m.start(1)); + assertEquals(18, m.end(1)); + assertEquals(19, m.start(2)); + assertEquals(23, m.end(2)); + assertEquals("p63#q888", m.group()); + assertEquals("p63#q888", m.group(0)); + assertEquals("p63", m.group(1)); + assertEquals("q888", m.group(2)); + assertFalse(m.find()); + } + + public void testReplace() throws PatternSyntaxException { + Pattern p; + Matcher m; + + // Note: examples from book, + // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171 + p = Pattern.compile("a*b"); + + m = p.matcher("aabfooaabfooabfoob"); + assertTrue(m.replaceAll("-").equals("-foo-foo-foo-")); + assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob")); + + /* + * p = Pattern.compile ("\\p{Blank}"); + * + * m = p.matcher ("fee fie foe fum"); assertTrue + * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue + * (m.replaceAll("-").equals ("fee-fie-foe-fum")); + */ + + p = Pattern.compile("([bB])yte"); + + m = p.matcher("Byte for byte"); + assertTrue(m.replaceFirst("$1ite").equals("Bite for byte")); + assertTrue(m.replaceAll("$1ite").equals("Bite for bite")); + + p = Pattern.compile("\\d\\d\\d\\d([- ])"); + + m = p.matcher("card #1234-5678-1234"); + assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234")); + assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234")); + + p = Pattern.compile("(up|left)( *)(right|down)"); + + m = p.matcher("left right, up down"); + assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down")); + assertTrue(m.replaceAll("$3$2$1").equals("right left, down up")); + + p = Pattern.compile("([CcPp][hl]e[ea]se)"); + + m = p.matcher("I want cheese. Please."); + assertTrue(m.replaceFirst(" $1 ").equals( + "I want cheese . Please.")); + assertTrue(m.replaceAll(" $1 ").equals( + "I want cheese . Please .")); + } + + public void testEscapes() throws PatternSyntaxException { + Pattern p; + Matcher m; + + // Test \\ sequence + p = Pattern.compile("([a-z]+)\\\\([a-z]+);"); + m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;"); + assertTrue(m.find()); + assertEquals("fred", m.group(1)); + assertEquals("ginger", m.group(2)); + assertTrue(m.find()); + assertEquals("abbott", m.group(1)); + assertEquals("costello", m.group(2)); + assertTrue(m.find()); + assertEquals("jekell", m.group(1)); + assertEquals("hyde", m.group(2)); + assertFalse(m.find()); + + // Test \n, \t, \r, \f, \e, \a sequences + p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)"); + m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh"); + assertTrue(m.find()); + assertEquals("aa", m.group(1)); + assertEquals("bb", m.group(2)); + assertTrue(m.find()); + assertEquals("cc", m.group(1)); + assertEquals("dd", m.group(2)); + assertTrue(m.find()); + assertEquals("ee", m.group(1)); + assertEquals("ff", m.group(2)); + assertTrue(m.find()); + assertEquals("gg", m.group(1)); + assertEquals("hh", m.group(2)); + assertFalse(m.find()); + + // Test \\u and \\x sequences +p = Pattern.compile("([0-9]+)[\\u0020:\\x21];"); + m = p.matcher("11:;22 ;33-;44!;"); + assertTrue(m.find()); + assertEquals("11", m.group(1)); + assertTrue(m.find()); + assertEquals("22", m.group(1)); + assertTrue(m.find()); + assertEquals("44", m.group(1)); + assertFalse(m.find()); + + // Test invalid unicode sequences + try { + p = Pattern.compile("\\u"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\u;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\u002"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\u002;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test invalid hex sequences + try { + p = Pattern.compile("\\x"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\x;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\xa"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\xa;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test \0 (octal) sequences (1, 2 and 3 digit) + p = Pattern.compile("([0-9]+)[\\07\\040\\0160];"); + m = p.matcher("11\u0007;22:;33 ;44p;"); + assertTrue(m.find()); + assertEquals("11", m.group(1)); + assertTrue(m.find()); + assertEquals("33", m.group(1)); + assertTrue(m.find()); + assertEquals("44", m.group(1)); + assertFalse(m.find()); + + // Test invalid octal sequences + try { + p = Pattern.compile("\\08"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // originally contributed test did not check the result + // TODO: check what RI does here + // try { + // p = Pattern.compile("\\0477"); + // fail("PatternSyntaxException expected"); + // } catch (PatternSyntaxException e) { + // } + + try { + p = Pattern.compile("\\0"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\0;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test \c (control character) sequence + p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];"); + m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;"); + assertTrue(m.find()); + assertEquals("11", m.group(1)); + assertTrue(m.find()); + assertEquals("33", m.group(1)); + assertTrue(m.find()); + assertEquals("55", m.group(1)); + assertTrue(m.find()); + assertEquals("66", m.group(1)); + assertFalse(m.find()); + + // More thorough control escape test + // Ensure that each escape matches exactly the corresponding + // character + // code and no others (well, from 0-255 at least) + int i, j; + for (i = 0; i < 26; i++) { + p = Pattern.compile("\\c" + Character.toString((char) ('A' + i))); + int match_char = -1; + for (j = 0; j < 255; j++) { + m = p.matcher(Character.toString((char) j)); + if (m.matches()) { + assertEquals(-1, match_char); + match_char = j; + } + } + assertTrue(match_char == i + 1); + } + + // Test invalid control escapes + try { + p = Pattern.compile("\\c"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // originally contributed test did not check the result + // TODO: check what RI does here + // try { + // p = Pattern.compile("\\c;"); + // fail("PatternSyntaxException expected"); + // } catch (PatternSyntaxException e) { + // } + // + // try { + // p = Pattern.compile("\\ca;"); + // fail("PatternSyntaxException expected"); + // } catch (PatternSyntaxException e) { + // } + // + // try { + // p = Pattern.compile("\\c4;"); + // fail("PatternSyntaxException expected"); + // } catch (PatternSyntaxException e) { + // } + } + + public void testCharacterClasses() throws PatternSyntaxException { + Pattern p; + Matcher m; + + // Test one character range + p = Pattern.compile("[p].*[l]"); + m = p.matcher("paul"); + assertTrue(m.matches()); + m = p.matcher("pool"); + assertTrue(m.matches()); + m = p.matcher("pong"); + assertFalse(m.matches()); + m = p.matcher("pl"); + assertTrue(m.matches()); + + // Test two character range + p = Pattern.compile("[pm].*[lp]"); + m = p.matcher("prop"); + assertTrue(m.matches()); + m = p.matcher("mall"); + assertTrue(m.matches()); + m = p.matcher("pong"); + assertFalse(m.matches()); + m = p.matcher("pill"); + assertTrue(m.matches()); + + // Test range including [ and ] + p = Pattern.compile("[<\\[].*[\\]>]"); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher("[bar]"); + assertTrue(m.matches()); + m = p.matcher("{foobar]"); + assertFalse(m.matches()); + m = p.matcher(""); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher(""); + assertFalse(m.matches()); + m = p + .matcher("xyz zzz"); + assertTrue(m.find()); + assertTrue(m.find()); + assertFalse(m.find()); + + // Test \S (not whitespace) + p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>"); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher(""); + assertFalse(m.matches()); + m = p.matcher(""); + assertTrue(m.matches()); + p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>"); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher(""); + assertTrue(m.matches()); + m = p.matcher(""); + assertFalse(m.matches()); + m = p.matcher(""); + assertFalse(m.matches()); + m = p.matcher(""); + assertFalse(m.matches()); + m = p.matcher(""); + assertTrue(m.matches()); + + // Test \w (ascii word) + p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;"); + m = p.matcher(""); + * m = p.matcher(""); assertTrue(m.matches()); m = p.matcher(""); + * assertTrue(m.matches()); m = p.matcher(""); + * assertFalse(m.matches()); + */ + p = Pattern.compile("\\p{Lower}+"); + m = p.matcher("abcdefghijklmnopqrstuvwxyz"); + assertTrue(m.matches()); + + // Invalid uses of \p{Lower} + try { + p = Pattern.compile("\\p"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{Lower"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{Lower;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test \p{Upper} + /* + * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>"); + * m = p.matcher(""); assertTrue(m.matches()); m = p.matcher(""); + * assertTrue(m.matches()); m = p.matcher(""); + * assertFalse(m.matches()); + */ + p = Pattern.compile("\\p{Upper}+"); + m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + assertTrue(m.matches()); + + // Invalid uses of \p{Upper} + try { + p = Pattern.compile("\\p{Upper"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{Upper;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test \p{ASCII} + /* + * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>"); + * m = p.matcher(""); assertTrue(m.matches()); m = + * p.matcher(""); assertTrue(m.matches()); m = + * p.matcher("<\u00846#:E E>"); assertFalse(m.matches()) + */ + int i; + p = Pattern.compile("\\p{ASCII}"); + for (i = 0; i < 0x80; i++) { + m = p.matcher(Character.toString((char) i)); + assertTrue(m.matches()); + } + for (; i < 0xff; i++) { + m = p.matcher(Character.toString((char) i)); + assertFalse(m.matches()); + } + + // Invalid uses of \p{ASCII} + try { + p = Pattern.compile("\\p{ASCII"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + try { + p = Pattern.compile("\\p{ASCII;"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + } + + // Test \p{Alpha} + // TODO + + // Test \p{Digit} + // TODO + + // Test \p{XDigit} + // TODO + + // Test \p{Alnum} + // TODO + + // Test \p{Punct} + // TODO + + // Test \p{Graph} + // TODO + + // Test \p{Print} + // TODO + + // Test \p{Blank} + // TODO + + // Test \p{Space} + // TODO + + // Test \p{Cntrl} + // TODO + } + + public void testUnicodeCategories() throws PatternSyntaxException { + // Test Unicode categories using \p and \P + // One letter codes: L, M, N, P, S, Z, C + // Two letter codes: Lu, Nd, Sc, Sm, ... + // See java.lang.Character and Unicode standard for complete list + // TODO + // Test \p{L} + // TODO + + // Test \p{N} + // TODO + + // ... etc + + // Test two letter codes: + // From unicode.org: + // Lu + // Ll + // Lt + // Lm + // Lo + // Mn + // Mc + // Me + // Nd + // Nl + // No + // Pc + // Pd + // Ps + // Pe + // Pi + // Pf + // Po + // Sm + // Sc + // Sk + // So + // Zs + // Zl + // Zp + // Cc + // Cf + // Cs + // Co + // Cn + } + + public void testUnicodeBlocks() throws PatternSyntaxException { + Pattern p; + Matcher m; + int i, j; + + // Test Unicode blocks using \p and \P + // FIXME: + // Note that LatinExtended-B and ArabicPresentations-B are unrecognized + // by the reference JDK. + for (i = 0; i < UBlocks.length; i++) { + /* + * p = Pattern.compile("\\p{"+UBlocks[i].name+"}"); + * + * if (UBlocks[i].low > 0) { m = + * p.matcher(Character.toString((char)(UBlocks[i].low-1))); + * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <= + * UBlocks[i].high; j++) { m = + * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); } + * if (UBlocks[i].high < 0xFFFF) { m = + * p.matcher(Character.toString((char)(UBlocks[i].high+1))); + * assertFalse(m.matches()); } + * + * p = Pattern.compile("\\P{"+UBlocks[i].name+"}"); + * + * if (UBlocks[i].low > 0) { m = + * p.matcher(Character.toString((char)(UBlocks[i].low-1))); + * assertTrue(m.matches()); } for (j=UBlocks[i].low; j < + * UBlocks[i].high; j++) { m = + * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); } + * if (UBlocks[i].high < 0xFFFF) { m = + * p.matcher(Character.toString((char)(UBlocks[i].high+1))); + * assertTrue(m.matches()); } + */ + + p = Pattern.compile("\\p{In" + UBlocks[i].name + "}"); + + if (UBlocks[i].low > 0) { + m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); + assertFalse(m.matches()); + } + for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) { + m = p.matcher(Character.toString((char) j)); + // TODO investigate, why this fails and uncomment + //assertTrue(m.matches()); + } + if (UBlocks[i].high < 0xFFFF) { + m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); + // TODO investigate, why this fails and uncomment + //assertFalse(m.matches()); + } + + p = Pattern.compile("\\P{In" + UBlocks[i].name + "}"); + + if (UBlocks[i].low > 0) { + m = p.matcher(Character.toString((char) (UBlocks[i].low - 1))); + assertTrue(m.matches()); + } + for (j = UBlocks[i].low; j < UBlocks[i].high; j++) { + m = p.matcher(Character.toString((char) j)); + assertFalse(m.matches()); + } + if (UBlocks[i].high < 0xFFFF) { + m = p.matcher(Character.toString((char) (UBlocks[i].high + 1))); + // TODO investigate, why this fails and uncomment + //assertTrue(m.matches()); + } + } + } + + public void testCapturingGroups() throws PatternSyntaxException { + // Test simple capturing groups + // TODO + + // Test grouping without capture (?:...) + // TODO + + // Test combination of grouping and capture + // TODO + + // Test \ sequence with capturing and non-capturing groups + // TODO + + // Test \ with out of range + // TODO + } + + public void testRepeats() { + // Test ? + // TODO + + // Test * + // TODO + + // Test + + // TODO + + // Test {}, including 0, 1 and more + // TODO + + // Test {,}, including 0, 1 and more + // TODO + + // Test {,}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?) + // TODO + } + + public void testAnchors() throws PatternSyntaxException { + // Test ^, default and MULTILINE + // TODO + + // Test $, default and MULTILINE + // TODO + + // Test \b (word boundary) + // TODO + + // Test \B (not a word boundary) + // TODO + + // Test \A (beginning of string) + // TODO + + // Test \Z (end of string) + // TODO + + // Test \z (end of string) + // TODO + + // Test \G + // TODO + + // Test positive lookahead using (?=...) + // TODO + + // Test negative lookahead using (?!...) + // TODO + + // Test positive lookbehind using (?<=...) + // TODO + + // Test negative lookbehind using (?...) + // TODO + + // Test (?onflags-offflags) + // Valid flags are i,m,d,s,u,x + // TODO + + // Test (?onflags-offflags:...) + // TODO + + // Test \Q, \E + p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+"); + m = p.matcher("abc;[a-z]+;\\Q(foo.*);411"); + assertTrue(m.matches()); + m = p.matcher("abc;def;foo42;555"); + assertFalse(m.matches()); + m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123"); + assertFalse(m.matches()); + + p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+"); + m = p.matcher("abc;foo5-(...);123"); + assertTrue(m.matches()); + assertEquals("foo5-(...)", m.group(1)); + m = p.matcher("abc;foo9-(xxx);789"); + assertFalse(m.matches()); + + p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+"); + m = p.matcher("abc;bar0-def$-;123"); + assertTrue(m.matches()); + + // FIXME: + // This should work the same as the pattern above but fails with the + // the reference JDK + p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+"); + m = p.matcher("abc;bar0-def$-;123"); + // assertTrue(m.matches()); + + // FIXME: + // This should work too .. it looks as if just about anything that + // has more + // than one character between \Q and \E is broken in the the reference + // JDK + p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+"); + m = p.matcher("abc;bar0-def[99]-]0x[;123"); + // assertTrue(m.matches()); + + // This is the same as above but with explicit escapes .. and this + // does work + // on the the reference JDK + p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+"); + m = p.matcher("abc;bar0-def[99]-]0x[;123"); + assertTrue(m.matches()); + + // Test # + // TODO + } + + public void testCompile1() throws PatternSyntaxException { + Pattern pattern = Pattern + .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*"); + String name = "iso-8859-1"; + assertTrue(pattern.matcher(name).matches()); + } + + public void testCompile2() throws PatternSyntaxException { + String findString = "\\Qimport\\E"; + + Pattern pattern = Pattern.compile(findString, 0); + Matcher matcher = pattern.matcher(new String( + "import a.A;\n\n import b.B;\nclass C {}")); + + assertTrue(matcher.find(0)); + } + + public void testCompile3() throws PatternSyntaxException { + Pattern p; + Matcher m; + p = Pattern.compile("a$"); + m = p.matcher("a\n"); + assertTrue(m.find()); + assertEquals("a", m.group()); + assertFalse(m.find()); + + p = Pattern.compile("(a$)"); + m = p.matcher("a\n"); + assertTrue(m.find()); + assertEquals("a", m.group()); + assertEquals("a", m.group(1)); + assertFalse(m.find()); + + p = Pattern.compile("^.*$", Pattern.MULTILINE); + + m = p.matcher("a\n"); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("a", m.group()); + assertFalse(m.find()); + + m = p.matcher("a\nb\n"); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("a", m.group()); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("b", m.group()); + assertFalse(m.find()); + + m = p.matcher("a\nb"); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("a", m.group()); + assertTrue(m.find()); + assertEquals("b", m.group()); + assertFalse(m.find()); + + m = p.matcher("\naa\r\nbb\rcc\n\n"); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertTrue(m.group().equals("")); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("aa", m.group()); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("bb", m.group()); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertEquals("cc", m.group()); + assertTrue(m.find()); + // System.out.println("["+m.group()+"]"); + assertTrue(m.group().equals("")); + assertFalse(m.find()); + + m = p.matcher("a"); + assertTrue(m.find()); + assertEquals("a", m.group()); + assertFalse(m.find()); + + m = p.matcher(""); + // FIXME: This matches the reference behaviour but is + // inconsistent with matching "a" - ie. the end of the + // target string should match against $ always but this + // appears to work with the null string only when not in + // multiline mode (see below) + assertFalse(m.find()); + + p = Pattern.compile("^.*$"); + m = p.matcher(""); + assertTrue(m.find()); + assertTrue(m.group().equals("")); + assertFalse(m.find()); + } + + public void testCompile4() throws PatternSyntaxException { + String findString = "\\Qpublic\\E"; + StringBuffer text = new StringBuffer(" public class Class {\n" + + " public class Class {"); + + Pattern pattern = Pattern.compile(findString, 0); + Matcher matcher = pattern.matcher(text); + + boolean found = matcher.find(); + assertTrue(found); + assertEquals(4, matcher.start()); + if (found) { + // modify text + text.delete(0, text.length()); + text.append("Text have been changed."); + matcher.reset(text); + } + + found = matcher.find(); + assertFalse(found); + } + + public void testCompile5() throws PatternSyntaxException { + Pattern p = Pattern.compile("^[0-9]"); + String s[] = p.split("12", -1); + assertEquals("", s[0]); + assertEquals("2", s[1]); + assertEquals(2, s.length); + } + + // public void testCompile6() { + // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+"; + // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+"; + // try { + // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); + // assertTrue(true); + // } catch (PatternSyntaxException e) { + // System.out.println(e.getMessage()); + // assertTrue(false); + // } + // } + + private static class UBInfo { + public UBInfo(int low, int high, String name) { + this.name = name; + this.low = low; + this.high = high; + } + + public String name; + + public int low, high; + } + + // A table representing the unicode categories + // private static UBInfo[] UCategories = { + // Lu + // Ll + // Lt + // Lm + // Lo + // Mn + // Mc + // Me + // Nd + // Nl + // No + // Pc + // Pd + // Ps + // Pe + // Pi + // Pf + // Po + // Sm + // Sc + // Sk + // So + // Zs + // Zl + // Zp + // Cc + // Cf + // Cs + // Co + // Cn + // }; + + // A table representing the unicode character blocks + private static UBInfo[] UBlocks = { + /* 0000; 007F; Basic Latin */ + new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN + /* 0080; 00FF; Latin-1 Supplement */ + new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT + /* 0100; 017F; Latin Extended-A */ + new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A + /* 0180; 024F; Latin Extended-B */ + // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), // + // Character.UnicodeBlock.LATIN_EXTENDED_B + /* 0250; 02AF; IPA Extensions */ + new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS + /* 02B0; 02FF; Spacing Modifier Letters */ + new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS + /* 0300; 036F; Combining Diacritical Marks */ + new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS + /* 0370; 03FF; Greek */ + new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK + /* 0400; 04FF; Cyrillic */ + new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC + /* 0530; 058F; Armenian */ + new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN + /* 0590; 05FF; Hebrew */ + new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW + /* 0600; 06FF; Arabic */ + new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC + /* 0700; 074F; Syriac */ + new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC + /* 0780; 07BF; Thaana */ + new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA + /* 0900; 097F; Devanagari */ + new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI + /* 0980; 09FF; Bengali */ + new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI + /* 0A00; 0A7F; Gurmukhi */ + new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI + /* 0A80; 0AFF; Gujarati */ + new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI + /* 0B00; 0B7F; Oriya */ + new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA + /* 0B80; 0BFF; Tamil */ + new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL + /* 0C00; 0C7F; Telugu */ + new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU + /* 0C80; 0CFF; Kannada */ + new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA + /* 0D00; 0D7F; Malayalam */ + new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM + /* 0D80; 0DFF; Sinhala */ + new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA + /* 0E00; 0E7F; Thai */ + new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI + /* 0E80; 0EFF; Lao */ + new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO + /* 0F00; 0FFF; Tibetan */ + new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN + /* 1000; 109F; Myanmar */ + new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR + /* 10A0; 10FF; Georgian */ + new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN + /* 1100; 11FF; Hangul Jamo */ + new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO + /* 1200; 137F; Ethiopic */ + new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC + /* 13A0; 13FF; Cherokee */ + new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE + /* 1400; 167F; Unified Canadian Aboriginal Syllabics */ + new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS + /* 1680; 169F; Ogham */ + new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM + /* 16A0; 16FF; Runic */ + new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC + /* 1780; 17FF; Khmer */ + new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER + /* 1800; 18AF; Mongolian */ + new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN + /* 1E00; 1EFF; Latin Extended Additional */ + new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL + /* 1F00; 1FFF; Greek Extended */ + new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED + /* 2000; 206F; General Punctuation */ + new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION + /* 2070; 209F; Superscripts and Subscripts */ + new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS + /* 20A0; 20CF; Currency Symbols */ + new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS + /* 20D0; 20FF; Combining Marks for Symbols */ + new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS + /* 2100; 214F; Letterlike Symbols */ + new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS + /* 2150; 218F; Number Forms */ + new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS + /* 2190; 21FF; Arrows */ + new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS + /* 2200; 22FF; Mathematical Operators */ + new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS + /* 2300; 23FF; Miscellaneous Technical */ + new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL + /* 2400; 243F; Control Pictures */ + new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES + /* 2440; 245F; Optical Character Recognition */ + new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION + /* 2460; 24FF; Enclosed Alphanumerics */ + new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS + /* 2500; 257F; Box Drawing */ + new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING + /* 2580; 259F; Block Elements */ + new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS + /* 25A0; 25FF; Geometric Shapes */ + new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES + /* 2600; 26FF; Miscellaneous Symbols */ + new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS + /* 2700; 27BF; Dingbats */ + new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS + /* 2800; 28FF; Braille Patterns */ + new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS + /* 2E80; 2EFF; CJK Radicals Supplement */ + new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT + /* 2F00; 2FDF; Kangxi Radicals */ + new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS + /* 2FF0; 2FFF; Ideographic Description Characters */ + new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS + /* 3000; 303F; CJK Symbols and Punctuation */ + new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION + /* 3040; 309F; Hiragana */ + new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA + /* 30A0; 30FF; Katakana */ + new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA + /* 3100; 312F; Bopomofo */ + new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO + /* 3130; 318F; Hangul Compatibility Jamo */ + new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO + /* 3190; 319F; Kanbun */ + new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN + /* 31A0; 31BF; Bopomofo Extended */ + new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED + /* 3200; 32FF; Enclosed CJK Letters and Months */ + new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS + /* 3300; 33FF; CJK Compatibility */ + new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY + /* 3400; 4DB5; CJK Unified Ideographs Extension A */ + new UBInfo(0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A + /* 4E00; 9FFF; CJK Unified Ideographs */ + new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS + /* A000; A48F; Yi Syllables */ + new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES + /* A490; A4CF; Yi Radicals */ + new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS + /* AC00; D7A3; Hangul Syllables */ + new UBInfo(0xAC00, 0xD7A3, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES + /* D800; DB7F; High Surrogates */ + /* DB80; DBFF; High Private Use Surrogates */ + /* DC00; DFFF; Low Surrogates */ + /* E000; F8FF; Private Use */ + /* F900; FAFF; CJK Compatibility Ideographs */ + new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS + /* FB00; FB4F; Alphabetic Presentation Forms */ + new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS + /* FB50; FDFF; Arabic Presentation Forms-A */ + new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A + /* FE20; FE2F; Combining Half Marks */ + new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS + /* FE30; FE4F; CJK Compatibility Forms */ + new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS + /* FE50; FE6F; Small Form Variants */ + new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS + /* FE70; FEFE; Arabic Presentation Forms-B */ + // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), // + // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B + /* FEFF; FEFF; Specials */ + new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS + /* FF00; FFEF; Halfwidth and Fullwidth Forms */ + new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS + /* FFF0; FFFD; Specials */ + new UBInfo(0xFFF0, 0xFFFD, "Specials") // Character.UnicodeBlock.SPECIALS + }; +} \ No newline at end of file diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternErrorTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternErrorTest.java new file mode 100644 index 000000000..a73f91895 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternErrorTest.java @@ -0,0 +1,67 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +/** + * Test boundary and error conditions in java.util.regex.Pattern + */ +@SuppressWarnings("nls") +public class PatternErrorTest extends TestCase { + public void testCompileErrors() throws Exception { + // null regex string - should get NullPointerException + try { + Pattern.compile(null); + fail("NullPointerException expected"); + } catch (NullPointerException e) { + } + + // empty regex string - no exception should be thrown + Pattern.compile(""); + + // note: invalid regex syntax checked in PatternSyntaxExceptionTest + + // flags = 0 should raise no exception + int flags = 0; + Pattern.compile("foo", flags); + + // check that all valid flags accepted without exception + flags |= Pattern.UNIX_LINES; + flags |= Pattern.CASE_INSENSITIVE; + flags |= Pattern.MULTILINE; + flags |= Pattern.CANON_EQ; + flags |= Pattern.COMMENTS; + flags |= Pattern.DOTALL; + flags |= Pattern.UNICODE_CASE; + Pattern.compile("foo", flags); + + // add invalid flags - should get IllegalArgumentException + // regression test for HARMONY-4248 + flags |= 0xFFFFFFFF; + // TODO investigate, why this fails and uncomment + /* + try { + Pattern.compile("foo", flags); + fail("Expected IllegalArgumentException to be thrown"); + } catch (IllegalArgumentException e) { + // This is the expected exception + }*/ + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternSyntaxExceptionTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternSyntaxExceptionTest.java new file mode 100644 index 000000000..c9e53a633 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternSyntaxExceptionTest.java @@ -0,0 +1,61 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + + +/** + * TODO Type description + */ +@SuppressWarnings("nls") +public class PatternSyntaxExceptionTest extends TestCase { + public void testCase() { + String regex = "("; + try { + Pattern.compile(regex); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + // TOFIX: Commented out assertEquals tests... + // TOFIX: should we match exception strings? + // assertEquals("Unclosed group", e.getDescription()); + assertEquals(1, e.getIndex()); + // assertEquals("Unclosed group near index 1\n(\n ^", + // e.getMessage()); + assertEquals(regex, e.getPattern()); + } + } + + public void testCase2() { + String regex = "[4-"; + try { + Pattern.compile(regex); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException e) { + // TOFIX: Commented out assertEquals tests... + // TOFIX: should we match exception strings? + // assertEquals("Illegal character range", e.getDescription()); + assertEquals(3, e.getIndex()); + // assertEquals("Illegal character range near index 3\n[4-\n ^", + // e.getMessage()); + assertEquals(regex, e.getPattern()); + } + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java new file mode 100644 index 000000000..606fc6cde --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java @@ -0,0 +1,1635 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.io.Serializable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + + +@SuppressWarnings("nls") +public class PatternTest extends TestCase { + String[] testPatterns = { + "(a|b)*abb", + "(1*2*3*4*)*567", + "(a|b|c|d)*aab", + "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", + "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", + "(a|b)*(a|b)*A(a|b)*lice.*", + "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" + + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", + "x(?c)y", "x(?cc)y", "x(?:c)y" + + }; + + public PatternTest(String name) { + super(name); + } + + public void testCommentsInPattern() { + Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS); + assertTrue(p.matcher("abcd").matches()); + } + + /* + * Class under test for String[] split(CharSequence, int) + */ + public void testSplitCharSequenceint() { + // splitting CharSequence which ends with pattern + // bug6193 + assertEquals(",,".split(",", 3).length, 3); + assertEquals(",,".split(",", 4).length, 3); + // bug6193 + // bug5391 + assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); + assertEquals(Pattern.compile("b").split("ab", -1).length, 2); + // bug5391 + String s[]; + Pattern pat = Pattern.compile("x"); + s = pat.split("zxx:zzz:zxx", 10); + assertEquals(s.length, 5); + s = pat.split("zxx:zzz:zxx", 3); + assertEquals(s.length, 3); + s = pat.split("zxx:zzz:zxx", -1); + assertEquals(s.length, 5); + s = pat.split("zxx:zzz:zxx", 0); + assertEquals(s.length, 3); + // other splitting + // negative limit + pat = Pattern.compile("b"); + s = pat.split("abccbadfebb", -1); + assertEquals(s.length, 5); + s = pat.split("", -1); + assertEquals(s.length, 1); + pat = Pattern.compile(""); + s = pat.split("", -1); + assertEquals(s.length, 1); + s = pat.split("abccbadfe", -1); + assertEquals(s.length, 11); + // zero limit + pat = Pattern.compile("b"); + s = pat.split("abccbadfebb", 0); + assertEquals(s.length, 3); + s = pat.split("", 0); + assertEquals(s.length, 1); + pat = Pattern.compile(""); + s = pat.split("", 0); + assertEquals(s.length, 1); + s = pat.split("abccbadfe", 0); + assertEquals(s.length, 10); + // positive limit + pat = Pattern.compile("b"); + s = pat.split("abccbadfebb", 12); + assertEquals(s.length, 5); + s = pat.split("", 6); + assertEquals(s.length, 1); + pat = Pattern.compile(""); + s = pat.split("", 11); + assertEquals(s.length, 1); + s = pat.split("abccbadfe", 15); + assertEquals(s.length, 11); + + pat = Pattern.compile("b"); + s = pat.split("abccbadfebb", 5); + assertEquals(s.length, 5); + s = pat.split("", 1); + assertEquals(s.length, 1); + pat = Pattern.compile(""); + s = pat.split("", 1); + assertEquals(s.length, 1); + s = pat.split("abccbadfe", 11); + assertEquals(s.length, 11); + + pat = Pattern.compile("b"); + s = pat.split("abccbadfebb", 3); + assertEquals(s.length, 3); + pat = Pattern.compile(""); + s = pat.split("abccbadfe", 5); + assertEquals(s.length, 5); + } + + /* + * Class under test for String[] split(CharSequence) + */ + public void testSplitCharSequence() { + String s[]; + Pattern pat = Pattern.compile("b"); + s = pat.split("abccbadfebb"); + assertEquals(s.length, 3); + s = pat.split(""); + assertEquals(s.length, 1); + pat = Pattern.compile(""); + s = pat.split(""); + assertEquals(s.length, 1); + s = pat.split("abccbadfe"); + assertEquals(s.length, 10); + // bug6544 + String s1 = ""; + String[] arr = s1.split(":"); + assertEquals(arr.length, 1); + // bug6544 + } + + public void testPattern() { + } + + public void testFlags() { + String baseString; + String testString; + Pattern pat; + Matcher mat; + + baseString = "((?i)|b)a"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "(?i)a|b"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)a|b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "c|(?i)a|b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)a|(?s)b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)a|(?-i)b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "(?i)a|(?-i)c|b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "(?i)a|(?-i)c|(?i)b"; + testString = "B"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)a|(?-i)b"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "((?i))a"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "|(?i)|a"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)((?s)a.)"; + testString = "A\n"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)((?-i)a)"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "(?i)(?s:a.)"; + testString = "A\n"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)fgh(?s:aa)"; + testString = "fghAA"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?i)((?-i))a"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "abc(?i)d"; + testString = "ABCD"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + testString = "abcD"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "a(?i)a(?-i)a(?i)a(?-i)a"; + testString = "aAaAa"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "aAAAa"; + mat = pat.matcher(testString); + assertFalse(mat.matches()); + } + + public void testFlagsMethod() { + String baseString; + Pattern pat; + + /* + * These tests are for compatibility with RI only. Logically we have to + * return only flags specified during the compilation. For example + * pat.flags() == 0 when we compile Pattern pat = + * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled + * in a case insensitive manner. So there is little sense to do calls to + * flags() now. + */ + baseString = "(?-i)"; + pat = Pattern.compile(baseString); + + baseString = "(?idmsux)abc(?-i)vg(?-dmu)"; + pat = Pattern.compile(baseString); + assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); + + baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)"; + pat = Pattern.compile(baseString); + assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); + + baseString = "(?is)a((?x)b.)"; + pat = Pattern.compile(baseString); + assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE); + + baseString = "(?i)a((?-i))"; + pat = Pattern.compile(baseString); + assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE); + + baseString = "((?i)a)"; + pat = Pattern.compile(baseString); + assertEquals(pat.flags(), 0); + + pat = Pattern.compile("(?is)abc"); + assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); + } + + /* + * Class under test for Pattern compile(String, int) + */ + public void testCompileStringint() { + /* + * this tests are needed to verify that appropriate exceptions are + * thrown + */ + String pattern = "b)a"; + try { + Pattern.compile(pattern); + fail("Expected a PatternSyntaxException when compiling pattern: " + + pattern); + } catch (PatternSyntaxException e) { + // pass + } + pattern = "bcde)a"; + try { + Pattern.compile(pattern); + fail("Expected a PatternSyntaxException when compiling pattern: " + + pattern); + } catch (PatternSyntaxException e) { + // pass + } + pattern = "bbg())a"; + try { + Pattern.compile(pattern); + fail("Expected a PatternSyntaxException when compiling pattern: " + + pattern); + } catch (PatternSyntaxException e) { + // pass + } + + pattern = "cdb(?i))a"; + try { + Pattern.compile(pattern); + fail("Expected a PatternSyntaxException when compiling pattern: " + + pattern); + } catch (PatternSyntaxException e) { + // pass + } + + /* + * This pattern should compile - HARMONY-2127 + */ + pattern = "x(?c)y"; + Pattern.compile(pattern); + + /* + * this pattern doesn't match any string, but should be compiled anyway + */ + pattern = "(b\\1)a"; + Pattern.compile(pattern); + } + + /* + * Class under test for Pattern compile(String) + */ + public void testQuantCompileNeg() { + String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", + "{5,3shdfkjh}" }; + for (String element : patterns) { + try { + Pattern.compile(element); + fail("PatternSyntaxException was expected, but compilation succeeds"); + } catch (PatternSyntaxException pse) { + continue; + } + } + // Regression for HARMONY-1365 + String pattern = "(?![^\\\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()"; + assertNotNull(Pattern.compile(pattern)); + } + + public void testQuantCompilePos() { + String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" }; + for (String element : patterns) { + Pattern.compile(element); + } + } + + public void testQuantComposition() { + String pattern = "(a{1,3})aab"; + java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern); + java.util.regex.Matcher mat = pat.matcher("aaab"); + mat.matches(); + mat.start(1); + mat.group(1); + } + + public void testMatches() { + String[][] posSeq = { + { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, + { "213567", "12324567", "1234567", "213213567", + "21312312312567", "444444567" }, + { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, + { "213234567", "3458", "0987654", "7689546432", "0398576", + "98432", "5" }, + { + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, + { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", + "abbbAbbbliceaaa", "Alice" }, + { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, + { "xy" }, { "xy" }, { "xcy" } + + }; + + for (int i = 0; i < testPatterns.length; i++) { + for (int j = 0; j < posSeq[i].length; j++) { + assertTrue("Incorrect match: " + testPatterns[i] + " vs " + + posSeq[i][j], Pattern.matches(testPatterns[i], + posSeq[i][j])); + } + } + } + + public void testTimeZoneIssue() { + Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?"); + Matcher m = p.matcher("GMT-9:45"); + assertTrue(m.matches()); + assertEquals("-", m.group(1)); + assertEquals("9", m.group(2)); + assertEquals(":45", m.group(3)); + assertEquals("45", m.group(4)); + } + + public void testCompileRanges() { + String[] correctTestPatterns = { "[^]*abb]*", "[^a-d[^m-p]]*abb", + "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", + "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb", + "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" }; + + String[] inputSecuence = { "kkkk", "admpabb", "abcabcd124654abb", + "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb", + "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb", + "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" }; + + for (int i = 0; i < correctTestPatterns.length; i++) { + assertTrue("pattern: " + correctTestPatterns[i] + " input: " + + inputSecuence[i], Pattern.matches(correctTestPatterns[i], + inputSecuence[i])); + + } + + String[] wrongInputSecuence = { "]", "admpkk", "abcabcd124k654abb", + "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb", + "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb", + "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" }; + + for (int i = 0; i < correctTestPatterns.length; i++) { + assertFalse("pattern: " + correctTestPatterns[i] + " input: " + + wrongInputSecuence[i], Pattern.matches( + correctTestPatterns[i], wrongInputSecuence[i])); + + } + } + + public void testRangesSpecialCases() { + String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" }; + + for (String element : neg_patterns) { + try { + Pattern.compile(element); + fail("PatternSyntaxException was expected: " + element); + } catch (PatternSyntaxException pse) { + } + } + + String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--", + "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" }; + + for (int i = 0; i < pos_patterns.length; i++) { + String pat = pos_patterns[i++]; + String inp = pos_patterns[i]; + assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches( + pat, inp)); + } + } + + public void testZeroSymbols() { + assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb")); + } + + public void testEscapes() { + Pattern pat = Pattern.compile("\\Q{]()*?"); + Matcher mat = pat.matcher("{]()*?"); + + assertTrue(mat.matches()); + } + + public void testRegressions() { + // Bug 181 + Pattern.compile("[\\t-\\r]"); + + // HARMONY-4472 + Pattern.compile("a*.+"); + + // Bug187 + Pattern + .compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))"); + // HARMONY-5858 + Pattern.compile("\\u6211", Pattern.LITERAL); + } + + public void testOrphanQuantifiers() { + try { + Pattern.compile("+++++"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException pse) { + } + } + + public void testOrphanQuantifiers2() { + try { + Pattern.compile("\\d+*"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException pse) { + } + } + + public void testBug197() { + Object[] vals = { ":", new Integer(2), + new String[] { "boo", "and:foo" }, ":", new Integer(5), + new String[] { "boo", "and", "foo" }, ":", new Integer(-2), + new String[] { "boo", "and", "foo" }, ":", new Integer(3), + new String[] { "boo", "and", "foo" }, ":", new Integer(1), + new String[] { "boo:and:foo" }, "o", new Integer(5), + new String[] { "b", "", ":and:f", "", "" }, "o", + new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o", + new Integer(-2), new String[] { "b", "", ":and:f", "", "" }, + "o", new Integer(0), new String[] { "b", "", ":and:f" } }; + + for (int i = 0; i < vals.length / 3;) { + String[] res = Pattern.compile(vals[i++].toString()).split( + "boo:and:foo", ((Integer) vals[i++]).intValue()); + String[] expectedRes = (String[]) vals[i++]; + + assertEquals(expectedRes.length, res.length); + + for (int j = 0; j < expectedRes.length; j++) { + assertEquals(expectedRes[j], res[j]); + } + } + } + + public void testURIPatterns() { + String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; + String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; + String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; + String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$"; + String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$"; + String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"; + String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*"; + + Pattern.compile(URI_REGEXP_STR); + Pattern.compile(REL_URI_REGEXP_STR); + Pattern.compile(SCHEME_REGEXP_STR); + Pattern.compile(IPV4_REGEXP_STR); + Pattern.compile(IPV6_REGEXP_STR); + Pattern.compile(IPV6_REGEXP_STR2); + Pattern.compile(HOSTNAME_REGEXP_STR); + } + + public void testFindBoundaryCases1() { + Pattern pat = Pattern.compile(".*\n"); + Matcher mat = pat.matcher("a\n"); + + mat.find(); + assertEquals("a\n", mat.group()); + + } + + public void testFindBoundaryCases2() { + Pattern pat = Pattern.compile(".*A"); + Matcher mat = pat.matcher("aAa"); + + mat.find(); + assertEquals("aA", mat.group()); + + } + + public void testFindBoundaryCases3() { + Pattern pat = Pattern.compile(".*A"); + Matcher mat = pat.matcher("a\naA\n"); + + mat.find(); + assertEquals("aA", mat.group()); + + } + + public void testFindBoundaryCases4() { + Pattern pat = Pattern.compile("A.*"); + Matcher mat = pat.matcher("A\n"); + + mat.find(); + assertEquals("A", mat.group()); + + } + + public void testFindBoundaryCases5() { + Pattern pat = Pattern.compile(".*A.*"); + Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n"); + // Matcher mat = pat.matcher("\nA\n"); + String[] res = { "A", "A", "aaAaa", "aaaA" }; + int k = 0; + for (; mat.find(); k++) { + assertEquals(res[k], mat.group()); + } + } + + public void testFindBoundaryCases6() { + String[] res = { "", "a", "", "" }; + Pattern pat = Pattern.compile(".*"); + Matcher mat = pat.matcher("\na\n"); + int k = 0; + + for (; mat.find(); k++) { + assertEquals(res[k], mat.group()); + } + } + + public void _testFindBoundaryCases7() { + Pattern pat = Pattern.compile(".*"); + Matcher mat = pat.matcher("\na\n"); + int k = 0; + + for (; mat.find(); k++) { + System.out.println(mat.group()); + System.out.flush(); + } + } + + public void testBackReferences() { + Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); + Matcher mat = pat + .matcher("(start1: word :start1)(start2: word :start2)"); + int k = 1; + for (; mat.find(); k++) { + assertEquals("start" + k, mat.group(2)); + assertEquals(" word ", mat.group(3)); + assertEquals("start" + k, mat.group(4)); + + } + + assertEquals(3, k); + pat = Pattern.compile(".*(.)\\1"); + mat = pat.matcher("saa"); + assertTrue(mat.matches()); + } + + public void _testBackReferences1() { + Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); + Matcher mat = pat + .matcher("(start1: word :start1)(start2: word :start2)"); + int k = 1; + for (; mat.find(); k++) { + System.out.println(mat.group(2)); + System.out.println(mat.group(3)); + System.out.println(mat.group(4)); + + } + + assertEquals(3, k); + } + + public void testNewLine() { + Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE); + Matcher mat = pat.matcher("\r\n\n"); + int counter = 0; + while (mat.find()) { + counter++; + } + assertEquals(2, counter); + } + + public void testFindGreedy() { + Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL); + Matcher mat = pat.matcher("aaaa\naaa\naaaaaa"); + mat.matches(); + assertEquals(15, mat.end()); + } + + public void testSOLQuant() { + Pattern pat = Pattern.compile("$*", Pattern.MULTILINE); + Matcher mat = pat.matcher("\n\n"); + int counter = 0; + while (mat.find()) { + counter++; + } + + assertEquals(3, counter); + } + + public void testIllegalEscape() { + try { + Pattern.compile("\\y"); + fail("PatternSyntaxException expected"); + } catch (PatternSyntaxException pse) { + } + } + + public void testEmptyFamily() { + Pattern.compile("\\p{Lower}"); + } + + public void testNonCaptConstr() { + // Flags + Pattern pat = Pattern.compile("(?i)b*(?-i)a*"); + assertTrue(pat.matcher("bBbBaaaa").matches()); + assertFalse(pat.matcher("bBbBAaAa").matches()); + + // Non-capturing groups + pat = Pattern.compile("(?i:b*)a*"); + assertTrue(pat.matcher("bBbBaaaa").matches()); + assertFalse(pat.matcher("bBbBAaAa").matches()); + + pat = Pattern + // 1 2 3 4 5 6 7 8 9 10 11 + .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?"); + Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81"); + assertTrue(mat.matches()); + assertEquals("-1234", mat.group(1)); + assertEquals("21", mat.group(2)); + assertEquals("31", mat.group(3)); + assertEquals("T", mat.group(4)); + assertEquals("41", mat.group(5)); + assertEquals("51", mat.group(6)); + assertEquals("61", mat.group(7)); + assertEquals(".789", mat.group(8)); + assertEquals("+71", mat.group(9)); + assertEquals("81", mat.group(10)); + + // positive lookahead + pat = Pattern.compile(".*\\.(?=log$).*$"); + assertTrue(pat.matcher("a.b.c.log").matches()); + assertFalse(pat.matcher("a.b.c.log.").matches()); + + // negative lookahead + pat = Pattern.compile(".*\\.(?!log$).*$"); + assertFalse(pat.matcher("abc.log").matches()); + assertTrue(pat.matcher("abc.logg").matches()); + + // positive lookbehind + pat = Pattern.compile(".*(?<=abc)\\.log$"); + assertFalse(pat.matcher("cde.log").matches()); + assertTrue(pat.matcher("abc.log").matches()); + + // negative lookbehind + pat = Pattern.compile(".*(?a*)abb"); + assertFalse(pat.matcher("aaabb").matches()); + pat = Pattern.compile("(?>a*)bb"); + assertTrue(pat.matcher("aaabb").matches()); + + pat = Pattern.compile("(?>a|aa)aabb"); + assertTrue(pat.matcher("aaabb").matches()); + pat = Pattern.compile("(?>aa|a)aabb"); + assertFalse(pat.matcher("aaabb").matches()); + + // quantifiers over look ahead + pat = Pattern.compile(".*(?<=abc)*\\.log$"); + assertTrue(pat.matcher("cde.log").matches()); + pat = Pattern.compile(".*(?<=abc)+\\.log$"); + assertFalse(pat.matcher("cde.log").matches()); + + } + + public void _testCorrectReplacementBackreferencedJointSet() { + Pattern.compile("ab(a)*\\1"); + Pattern.compile("abc(cd)fg"); + Pattern.compile("aba*cd"); + Pattern.compile("ab(a)*+cd"); + Pattern.compile("ab(a)*?cd"); + Pattern.compile("ab(a)+cd"); + Pattern.compile(".*(.)\\1"); + Pattern.compile("ab((a)|c|d)e"); + Pattern.compile("abc((a(b))cd)"); + Pattern.compile("ab(a)++cd"); + Pattern.compile("ab(a)?(c)d"); + Pattern.compile("ab(a)?+cd"); + Pattern.compile("ab(a)??cd"); + Pattern.compile("ab(a)??cd"); + Pattern.compile("ab(a){1,3}?(c)d"); + } + + public void testCompilePatternWithTerminatorMark() { + Pattern pat = Pattern.compile("a\u0000\u0000cd"); + Matcher mat = pat.matcher("a\u0000\u0000cd"); + assertTrue(mat.matches()); + } + + public void testAlternations() { + String baseString = "|a|bc"; + Pattern pat = Pattern.compile(baseString); + Matcher mat = pat.matcher(""); + + assertTrue(mat.matches()); + + baseString = "a||bc"; + pat = Pattern.compile(baseString); + mat = pat.matcher(""); + assertTrue(mat.matches()); + + baseString = "a|bc|"; + pat = Pattern.compile(baseString); + mat = pat.matcher(""); + assertTrue(mat.matches()); + + baseString = "a|b|"; + pat = Pattern.compile(baseString); + mat = pat.matcher(""); + assertTrue(mat.matches()); + + baseString = "a(|b|cd)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "a(b||cd)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "a(b|cd|)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "a(b|c|)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "a(|)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "|"; + pat = Pattern.compile(baseString); + mat = pat.matcher(""); + assertTrue(mat.matches()); + + baseString = "a(?:|)e"; + pat = Pattern.compile(baseString); + mat = pat.matcher("ae"); + assertTrue(mat.matches()); + + baseString = "a||||bc"; + pat = Pattern.compile(baseString); + mat = pat.matcher(""); + assertTrue(mat.matches()); + + baseString = "(?i-is)|a"; + pat = Pattern.compile(baseString); + mat = pat.matcher("a"); + assertTrue(mat.matches()); + } + + public void testMatchWithGroups() { + String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr"; + String pattern = ".*(..).*\\1.*"; + assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); + + baseString = "saa"; + pattern = ".*(.)\\1"; + assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); + assertTrue(Pattern.compile(pattern).matcher(baseString).find()); + } + + public void testSplitEmptyCharSequence() { + String s1 = ""; + String[] arr = s1.split(":"); + assertEquals(arr.length, 1); + } + + public void testSplitEndsWithPattern() { + assertEquals(",,".split(",", 3).length, 3); + assertEquals(",,".split(",", 4).length, 3); + + assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); + assertEquals(Pattern.compile("b").split("ab", -1).length, 2); + } + + public void testCaseInsensitiveFlag() { + assertTrue(Pattern.matches("(?i-:AbC)", "ABC")); + } + + public void testEmptyGroups() { + Pattern pat = Pattern.compile("ab(?>)cda"); + Matcher mat = pat.matcher("abcda"); + assertTrue(mat.matches()); + + pat = Pattern.compile("ab()"); + mat = pat.matcher("ab"); + assertTrue(mat.matches()); + + pat = Pattern.compile("abc(?:)(..)"); + mat = pat.matcher("abcgf"); + assertTrue(mat.matches()); + } + + public void testCompileNonCaptGroup() { + boolean isCompiled = false; + + try { + Pattern.compile("(?:)", Pattern.CANON_EQ); + Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.DOTALL); + Pattern + .compile("(?:)", Pattern.CANON_EQ + | Pattern.CASE_INSENSITIVE); + Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.COMMENTS + | Pattern.UNIX_LINES); + isCompiled = true; + } catch (PatternSyntaxException e) { + System.out.println(e); + } + assertTrue(isCompiled); + } + + public void testEmbeddedFlags() { + String baseString = "(?i)((?s)a)"; + String testString = "A"; + Pattern pat = Pattern.compile(baseString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?x)(?i)(?s)(?d)a"; + testString = "A"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "(?x)(?i)(?s)(?d)a."; + testString = "a\n"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "abc(?x:(?i)(?s)(?d)a.)"; + testString = "abcA\n"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "abc((?x)d)(?i)(?s)a"; + testString = "abcdA"; + pat = Pattern.compile(baseString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + } + + public void testAltWithFlags() { + Pattern.compile("|(?i-xi)|()"); + } + + public void testRestoreFlagsAfterGroup() { + String baseString = "abc((?x)d) a"; + String testString = "abcd a"; + Pattern pat = Pattern.compile(baseString); + Matcher mat = pat.matcher(testString); + + assertTrue(mat.matches()); + } + + /* + * Verify if the Pattern support the following character classes: + * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored} + */ + public void testCompileCharacterClass() { + // Regression for HARMONY-606, 696 + Pattern pattern = Pattern.compile("\\p{javaLowerCase}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaUpperCase}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaWhitespace}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaMirrored}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaDefined}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaDigit}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaIdentifierIgnorable}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaISOControl}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaJavaIdentifierPart}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaJavaIdentifierStart}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaLetter}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaLetterOrDigit}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaSpaceChar}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaTitleCase}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}"); + assertNotNull(pattern); + + pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}"); + assertNotNull(pattern); + } + + public void testCanonEqFlag() { + + /* + * for decompositions see + * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt + * http://www.unicode.org/reports/tr15/#Decomposition + */ + String baseString; + String testString; + Pattern pat; + Matcher mat; + + baseString = "ab(a*)\\1"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + baseString = "a(abcdf)d"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + baseString = "aabcdfd"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 + // \u00CC -> \u0049\u0300 + + baseString = "\u01E0\u00CCcdb(ac)"; + testString = "\u0226\u0304\u0049\u0300cdbac"; + // TODO investigate, why this fails and uncomment + /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + baseString = "\u01E0cdb(a\u00CCc)"; + testString = "\u0041\u0307\u0304cdba\u0049\u0300c"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "a\u00CC"; + testString = "a\u0049\u0300"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "\u0226\u0304cdb(ac\u0049\u0300)"; + testString = "\u01E0cdbac\u00CC"; + // TODO investigate, why this fails and uncomment + /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + baseString = "cdb(?:\u0041\u0307\u0304\u00CC)"; + testString = "cdb\u0226\u0304\u0049\u0300"; + // TODO investigate, why this fails and uncomment + //pat = Pattern.compile(baseString, Pattern.CANON_EQ); + //mat = pat.matcher(testString); + // assertTrue(mat.matches()); + + baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)"; + testString = "\u01E0b\u00CCcdbac"; + // TODO investigate, why this fails and uncomment + // pat = Pattern.compile(baseString, Pattern.CANON_EQ); + // mat = pat.matcher(testString); + // assertTrue(mat.matches()); + + baseString = "\u01E0|\u00CCcdb(ac)"; + testString = "\u0041\u0307\u0304"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + //assertTrue(mat.matches()); + + baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]"; + testString = "cdb\u0041\u0307\u0304b"; + // TODO investigate, why this fails and uncomment + /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + baseString = "a\u0300"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("a\u00E0a"); + assertTrue(mat.find()); + + baseString = "\u7B20\uF9F8abc"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("\uF9F8\uF9F8abc"); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + + // \u01F9 -> \u006E\u0300 + // \u00C3 -> \u0041\u0303 + + baseString = "cdb(?:\u00C3\u006E\u0300)"; + testString = "cdb\u0041\u0303\u01F9"; + // TODO investigate, why this fails and uncomment + /* + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + // \u014C -> \u004F\u0304 + // \u0163 -> \u0074\u0327 + + baseString = "cdb(?:\u0163\u004F\u0304)"; + testString = "cdb\u0074\u0327\u014C"; + // TODO investigate, why this fails and uncomment + /* + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + // \u00E1->a\u0301 + // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 + + baseString = "c\u0327\u0301"; + testString = "c\u0301\u0327"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + /* + * Hangul decompositions + */ + // \uD4DB->\u1111\u1171\u11B6 + // \uD21E->\u1110\u116D\u11B5 + // \uD264->\u1110\u1170 + // not Hangul:\u0453->\u0433\u0301 + baseString = "a\uD4DB\u1111\u1171\u11B6\uD264"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + baseString = "\u0453c\uD4DB"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + baseString = "a\u1110\u116D\u11B5b\uD21Ebc"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + + baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)"; + testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + + baseString = "\uD4DB\uD264cdb(a\uD21Ec)"; + testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "a\uD4DB"; + testString = "a\u1111\u1171\u11B6"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "a\uD21E"; + testString = "a\u1110\u116D\u11B5"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)"; + testString = "\uD4DBcdbac\uD21E"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + + baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)"; + testString = "cdb\uD4DB\u1110\u116D\u11B5"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + /* + assertTrue(mat.matches()); + + baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)"; + testString = "\uD4DBb\uD21Ecdbac"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "\uD4DB|\u00CCcdb(ac)"; + testString = "\u1111\u1171\u11B6"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "\uD4DB|\u00CCcdb(ac)"; + testString = "\u1111\u1171"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]"; + testString = "cdb\u1111\u1171\u11B6b"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + baseString = "\uD4DB"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("a\u1111\u1171\u11B6a"); + assertTrue(mat.find()); + + baseString = "\u1111"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("bcda\uD4DBr"); + assertFalse(mat.find());*/ + } + + public void testIndexesCanonicalEq() { + String baseString; + String testString; + Pattern pat; + Matcher mat; + + baseString = "\uD4DB"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("bcda\u1111\u1171\u11B6awr"); + assertTrue(mat.find()); + assertEquals(mat.start(), 4); + assertEquals(mat.end(), 7); + + baseString = "\uD4DB\u1111\u1171\u11B6"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr"); + // TODO investigate, why this fails and uncomment + /*assertTrue(mat.find()); + assertEquals(mat.start(), 4); + assertEquals(mat.end(), 8);*/ + + baseString = "\uD4DB\uD21E\u1110\u1170"; + testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; + pat = Pattern.compile(baseString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + /*assertTrue(mat.find()); + assertEquals(mat.start(), 6); + assertEquals(mat.end(), 13);*/ + } + + public void testCanonEqFlagWithSupplementaryCharacters() { + + /* + * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 + * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F + * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 + */ + String patString = "abc\uD834\uDDBFef"; + String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; + Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; + // TODO investigate, why this fails and uncomment + /*mat = pat.matcher(testString); + assertTrue(mat.matches());*/ + + patString = "abc\uD834\uDDBB\uD834\uDD6Fef"; + testString = "abc\uD834\uDDBFef"; + pat = Pattern.compile(patString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + //assertTrue(mat.matches()); + + testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; + testString = "abc\uD834\uDDBFef"; + pat = Pattern.compile(patString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + + testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + + /* + * testSupplementary characters with no decomposition + */ + patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef"; + testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef"; + pat = Pattern.compile(patString, Pattern.CANON_EQ); + mat = pat.matcher(testString); + // TODO investigate, why this fails and uncomment + // assertTrue(mat.matches()); + } + + public void testRangesWithSurrogatesSupplementary() { + String patString = "[abc\uD8D2]"; + String testString = "\uD8D2"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "a"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "ef\uD8D2\uDD71gh"; + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "ef\uD8D2gh"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "[abc\uD8D3&&[c\uD8D3]]"; + testString = "c"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "a"; + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + testString = "ef\uD8D3\uDD71gh"; + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "ef\uD8D3gh"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]"; + testString = "c"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uDBEE\uDF0C"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "ef\uD8D3\uDD71gh"; + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "ef\uD8D3gh"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "[abc\uDBFC]\uDDC2cd"; + testString = "\uDBFC\uDDC2cd"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + testString = "a\uDDC2cd"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + } + + public void testSequencesWithSurrogatesSupplementary() { + String patString = "abcd\uD8D3"; + String testString = "abcd\uD8D3\uDFFC"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "abcd\uD8D3abc"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "ab\uDBEFcd"; + testString = "ab\uDBEFcd"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + patString = "\uDFFCabcd"; + testString = "\uD8D3\uDFFCabcd"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "abc\uDFFCabcdecd"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "\uD8D3\uDFFCabcd"; + testString = "abc\uD8D3\uD8D3\uDFFCabcd"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.find()); + } + + public void testPredefinedClassesWithSurrogatesSupplementary() { + String patString = "[123\\D]"; + String testString = "a"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.find()); + + testString = "5"; + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "3"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // low surrogate + testString = "\uDFC4"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // high surrogate + testString = "\uDADA"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + testString = "\uDADA\uDFC4"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "[123[^\\p{javaDigit}]]"; + testString = "a"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.find()); + + testString = "5"; + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "3"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // low surrogate + testString = "\uDFC4"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // high surrogate + testString = "\uDADA"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + testString = "\uDADA\uDFC4"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // surrogate characters + patString = "\\p{Cs}"; + testString = "\uD916\uDE27"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + + /* + * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we + * have to treat text as code points not code units. \\p{Cs} matches any + * surrogate character but here testString is a one code point + * consisting of two code units (two surrogate characters) so we find + * nothing + */ + // TODO investigate, why this fails and uncomment + //assertFalse(mat.find()); + + // swap low and high surrogates + testString = "\uDE27\uD916"; + mat = pat.matcher(testString); + assertTrue(mat.find()); + + patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]"; + testString = "1"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.find()); + + testString = "\uD916"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertFalse(mat.find()); + + testString = "\uD916\uDE27"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.find()); + + // \uD9A0\uDE8E=\u7828E + // \u78281=\uD9A0\uDE81 + patString = "[a-\uD9A0\uDE8E]"; + testString = "\uD9A0\uDE81"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + } + + public void testDotConstructionWithSurrogatesSupplementary() { + String patString = "."; + String testString = "\uD9A0\uDE81"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uDE81"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uD9A0"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\n"; + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + patString = ".*\uDE81"; + testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81"; + pat = Pattern.compile(patString); + mat = pat.matcher(testString); + assertFalse(mat.matches()); + + testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + patString = ".*"; + testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81"; + pat = Pattern.compile(patString, Pattern.DOTALL); + mat = pat.matcher(testString); + assertTrue(mat.matches()); + } + + public void testQuantifiersWithSurrogatesSupplementary() { + String patString = "\uD9A0\uDE81*abc"; + String testString = "\uD9A0\uDE81\uD9A0\uDE81abc"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "abc"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + } + + public void testAlternationsWithSurrogatesSupplementary() { + String patString = "\uDE81|\uD9A0\uDE81|\uD9A0"; + String testString = "\uD9A0"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uDE81"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uD9A0\uDE81"; + mat = pat.matcher(testString); + assertTrue(mat.matches()); + + testString = "\uDE81\uD9A0"; + mat = pat.matcher(testString); + assertFalse(mat.matches()); + } + + public void testGroupsWithSurrogatesSupplementary() { + + // this pattern matches nothing + String patString = "(\uD9A0)\uDE81"; + String testString = "\uD9A0\uDE81"; + Pattern pat = Pattern.compile(patString); + Matcher mat = pat.matcher(testString); + assertFalse(mat.matches()); + + patString = "(\uD9A0)"; + testString = "\uD9A0\uDE81"; + pat = Pattern.compile(patString, Pattern.DOTALL); + mat = pat.matcher(testString); + assertFalse(mat.find()); + } + + /* + * Regression test for HARMONY-688 + */ + public void testUnicodeCategoryWithSurrogatesSupplementary() { + Pattern p = Pattern.compile("\\p{javaLowerCase}"); + Matcher matcher = p.matcher("\uD801\uDC28"); + assertTrue(matcher.find()); + } + + public static void main(String[] args) { + junit.textui.TestRunner.run(PatternTest.class); + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java new file mode 100644 index 000000000..1583ee391 --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java @@ -0,0 +1,90 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + +@SuppressWarnings("nls") +public class ReplaceTest extends TestCase { + + public void testSimpleReplace() throws PatternSyntaxException { + String target, pattern, repl; + + target = "foobarfobarfoofo1"; + pattern = "fo[^o]"; + repl = "xxx"; + + Pattern p = Pattern.compile(pattern); + Matcher m = p.matcher(target); + + assertEquals("foobarxxxarfoofo1", m.replaceFirst(repl)); + assertEquals("foobarxxxarfooxxx", m.replaceAll(repl)); + } + + public void testCaptureReplace() { + String target, pattern, repl, s; + Pattern p = null; + Matcher m; + + target = "[31]foo;bar[42];[99]xyz"; + pattern = "\\[([0-9]+)\\]([a-z]+)"; + repl = "$2[$1]"; + + p = Pattern.compile(pattern); + m = p.matcher(target); + s = m.replaceFirst(repl); + assertEquals("foo[31];bar[42];[99]xyz", s); + s = m.replaceAll(repl); + assertEquals("foo[31];bar[42];xyz[99]", s); + + target = "[31]foo(42)bar{63}zoo;[12]abc(34)def{56}ghi;{99}xyz[88]xyz(77)xyz;"; + pattern = "\\[([0-9]+)\\]([a-z]+)\\(([0-9]+)\\)([a-z]+)\\{([0-9]+)\\}([a-z]+)"; + repl = "[$5]$6($3)$4{$1}$2"; + p = Pattern.compile(pattern); + m = p.matcher(target); + s = m.replaceFirst(repl); + // System.out.println(s); + assertEquals( + "[63]zoo(42)bar{31}foo;[12]abc(34)def{56}ghi;{99}xyz[88]xyz(77)xyz;", + s); + s = m.replaceAll(repl); + // System.out.println(s); + assertEquals( + "[63]zoo(42)bar{31}foo;[56]ghi(34)def{12}abc;{99}xyz[88]xyz(77)xyz;", + s); + } + + public void testEscapeReplace() { + String target, pattern, repl, s; + + target = "foo'bar''foo"; + pattern = "'"; + repl = "\\'"; + s = target.replaceAll(pattern, repl); + assertEquals("foo'bar''foo", s); + repl = "\\\\'"; + s = target.replaceAll(pattern, repl); + assertEquals("foo\\'bar\\'\\'foo", s); + repl = "\\$3"; + s = target.replaceAll(pattern, repl); + assertEquals("foo$3bar$3$3foo", s); + } +} diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java new file mode 100644 index 000000000..050bcb5eb --- /dev/null +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.teavm.classlib.java.util.regex; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import junit.framework.TestCase; + +/** + * TODO Type description + */ +@SuppressWarnings("nls") +public class SplitTest extends TestCase { + + public void testSimple() { + Pattern p = Pattern.compile("/"); + String[] results = p.split("have/you/done/it/right"); + String[] expected = new String[] { "have", "you", "done", "it", "right" }; + assertEquals(expected.length, results.length); + for (int i = 0; i < expected.length; i++) { + assertEquals(results[i], expected[i]); + } + } + + public void testSplit1() throws PatternSyntaxException { + Pattern p = Pattern.compile(" "); + + String input = "poodle zoo"; + String tokens[]; + + tokens = p.split(input, 1); + assertEquals(1, tokens.length); + assertTrue(tokens[0].equals(input)); + tokens = p.split(input, 2); + assertEquals(2, tokens.length); + assertEquals("poodle", tokens[0]); + assertEquals("zoo", tokens[1]); + tokens = p.split(input, 5); + assertEquals(2, tokens.length); + assertEquals("poodle", tokens[0]); + assertEquals("zoo", tokens[1]); + tokens = p.split(input, -2); + assertEquals(2, tokens.length); + assertEquals("poodle", tokens[0]); + assertEquals("zoo", tokens[1]); + tokens = p.split(input, 0); + assertEquals(2, tokens.length); + assertEquals("poodle", tokens[0]); + assertEquals("zoo", tokens[1]); + tokens = p.split(input); + assertEquals(2, tokens.length); + assertEquals("poodle", tokens[0]); + assertEquals("zoo", tokens[1]); + + p = Pattern.compile("d"); + + tokens = p.split(input, 1); + assertEquals(1, tokens.length); + assertTrue(tokens[0].equals(input)); + tokens = p.split(input, 2); + assertEquals(2, tokens.length); + assertEquals("poo", tokens[0]); + assertEquals("le zoo", tokens[1]); + tokens = p.split(input, 5); + assertEquals(2, tokens.length); + assertEquals("poo", tokens[0]); + assertEquals("le zoo", tokens[1]); + tokens = p.split(input, -2); + assertEquals(2, tokens.length); + assertEquals("poo", tokens[0]); + assertEquals("le zoo", tokens[1]); + tokens = p.split(input, 0); + assertEquals(2, tokens.length); + assertEquals("poo", tokens[0]); + assertEquals("le zoo", tokens[1]); + tokens = p.split(input); + assertEquals(2, tokens.length); + assertEquals("poo", tokens[0]); + assertEquals("le zoo", tokens[1]); + + p = Pattern.compile("o"); + + tokens = p.split(input, 1); + assertEquals(1, tokens.length); + assertTrue(tokens[0].equals(input)); + tokens = p.split(input, 2); + assertEquals(2, tokens.length); + assertEquals("p", tokens[0]); + assertEquals("odle zoo", tokens[1]); + tokens = p.split(input, 5); + assertEquals(5, tokens.length); + assertEquals("p", tokens[0]); + assertTrue(tokens[1].equals("")); + assertEquals("dle z", tokens[2]); + assertTrue(tokens[3].equals("")); + assertTrue(tokens[4].equals("")); + tokens = p.split(input, -2); + assertEquals(5, tokens.length); + assertEquals("p", tokens[0]); + assertTrue(tokens[1].equals("")); + assertEquals("dle z", tokens[2]); + assertTrue(tokens[3].equals("")); + assertTrue(tokens[4].equals("")); + tokens = p.split(input, 0); + assertEquals(3, tokens.length); + assertEquals("p", tokens[0]); + assertTrue(tokens[1].equals("")); + assertEquals("dle z", tokens[2]); + tokens = p.split(input); + assertEquals(3, tokens.length); + assertEquals("p", tokens[0]); + assertTrue(tokens[1].equals("")); + assertEquals("dle z", tokens[2]); + } + + public void testSplit2() { + Pattern p = Pattern.compile(""); + String s[]; + s = p.split("a", -1); + assertEquals(3, s.length); + assertEquals("", s[0]); + assertEquals("a", s[1]); + assertEquals("", s[2]); + + s = p.split("", -1); + assertEquals(1, s.length); + assertEquals("", s[0]); + + s = p.split("abcd", -1); + assertEquals(6, s.length); + assertEquals("", s[0]); + assertEquals("a", s[1]); + assertEquals("b", s[2]); + assertEquals("c", s[3]); + assertEquals("d", s[4]); + assertEquals("", s[5]); + } + + public void testSplitSupplementaryWithEmptyString() { + + /* + * See http://www.unicode.org/reports/tr18/#Supplementary_Characters We + * have to treat text as code points not code units. + */ + Pattern p = Pattern.compile(""); + String s[]; + s = p.split("a\ud869\uded6b", -1); + // TODO investigate, why this fails and uncomment + /*assertEquals(5, s.length); + assertEquals("", s[0]); + assertEquals("a", s[1]); + assertEquals("\ud869\uded6", s[2]); + assertEquals("b", s[3]); + assertEquals("", s[4]);*/ + } +} From e2b2108a973f87977b68791a61d3707658d869c2 Mon Sep 17 00:00:00 2001 From: Alexey Andreev Date: Sat, 10 May 2014 20:34:30 +0400 Subject: [PATCH 2/4] Fixes formatting --- teavm-classlib/pom.xml | 1 + .../java/util/regex/AbstractCharClass.java | 151 +- .../util/regex/AbstractLineTerminator.java | 7 +- .../classlib/java/util/regex/AbstractSet.java | 159 +- .../classlib/java/util/regex/AheadFSet.java | 8 +- .../util/regex/AltGroupQuantifierSet.java | 12 +- .../java/util/regex/AltQuantifierSet.java | 5 +- .../classlib/java/util/regex/AtomicFSet.java | 8 +- .../java/util/regex/AtomicJointSet.java | 19 +- .../java/util/regex/BackReferenceSet.java | 27 +- .../util/regex/BackReferencedSingleSet.java | 68 +- .../classlib/java/util/regex/BehindFSet.java | 6 +- .../java/util/regex/CIBackReferenceSet.java | 29 +- .../classlib/java/util/regex/CICharSet.java | 10 +- .../java/util/regex/CISequenceSet.java | 39 +- .../classlib/java/util/regex/CanClasses.java | 439 ---- .../classlib/java/util/regex/CharClass.java | 169 +- .../classlib/java/util/regex/CharSet.java | 22 +- .../regex/CompositeGroupQuantifierSet.java | 19 +- .../util/regex/CompositeQuantifierSet.java | 12 +- .../java/util/regex/CompositeRangeSet.java | 33 +- .../java/util/regex/DecomposedCharSet.java | 161 +- .../java/util/regex/DotAllQuantifierSet.java | 13 +- .../classlib/java/util/regex/DotAllSet.java | 43 +- .../java/util/regex/DotQuantifierSet.java | 39 +- .../classlib/java/util/regex/DotSet.java | 17 +- .../classlib/java/util/regex/EOISet.java | 11 +- .../classlib/java/util/regex/EOLSet.java | 21 +- .../classlib/java/util/regex/EmptySet.java | 53 +- .../teavm/classlib/java/util/regex/FSet.java | 13 +- .../classlib/java/util/regex/FinalSet.java | 8 +- .../java/util/regex/GroupQuantifierSet.java | 12 +- .../util/regex/HangulDecomposedCharSet.java | 79 +- .../java/util/regex/HashDecompositions.java | 2072 ----------------- .../java/util/regex/HighSurrogateCharSet.java | 49 +- .../classlib/java/util/regex/JointSet.java | 96 +- .../java/util/regex/LeafQuantifierSet.java | 16 +- .../classlib/java/util/regex/LeafSet.java | 6 +- .../teavm/classlib/java/util/regex/Lexer.java | 977 ++++---- .../util/regex/LowHighSurrogateRangeSet.java | 10 +- .../java/util/regex/LowSurrogateCharSet.java | 47 +- .../java/util/regex/MatchResultImpl.java | 23 +- .../java/util/regex/MultiLineEOLSet.java | 39 +- .../java/util/regex/MultiLineSOLSet.java | 19 +- .../java/util/regex/NegativeLookAhead.java | 12 +- .../java/util/regex/NegativeLookBehind.java | 9 +- .../classlib/java/util/regex/NonCapFSet.java | 10 +- .../java/util/regex/NonCapJointSet.java | 7 +- .../util/regex/PosAltGroupQuantifierSet.java | 19 +- .../regex/PosCompositeGroupQuantifierSet.java | 14 +- .../util/regex/PosPlusGroupQuantifierSet.java | 14 +- .../java/util/regex/PositiveLookAhead.java | 13 +- .../java/util/regex/PositiveLookBehind.java | 19 +- .../regex/PossessiveAltQuantifierSet.java | 12 +- .../PossessiveCompositeQuantifierSet.java | 12 +- .../regex/PossessiveGroupQuantifierSet.java | 12 +- .../util/regex/PossessiveQuantifierSet.java | 8 +- .../java/util/regex/PreviousMatch.java | 10 +- .../classlib/java/util/regex/Quantifier.java | 9 +- .../java/util/regex/QuantifierSet.java | 108 +- .../classlib/java/util/regex/RangeSet.java | 14 +- .../util/regex/RelAltGroupQuantifierSet.java | 8 +- .../regex/RelCompositeGroupQuantifierSet.java | 14 +- .../util/regex/ReluctantAltQuantifierSet.java | 8 +- .../ReluctantCompositeQuantifierSet.java | 9 +- .../regex/ReluctantGroupQuantifierSet.java | 9 +- .../util/regex/ReluctantQuantifierSet.java | 4 +- .../classlib/java/util/regex/SOLSet.java | 5 +- .../classlib/java/util/regex/SequenceSet.java | 5 + .../java/util/regex/SingleDecompositions.java | 971 -------- .../classlib/java/util/regex/SingleSet.java | 76 +- .../java/util/regex/SupplCharSet.java | 36 +- .../java/util/regex/SupplRangeSet.java | 33 +- .../classlib/java/util/regex/TMatcher.java | 103 +- .../classlib/java/util/regex/TPattern.java | 1211 +++++----- .../util/regex/TPatternSyntaxException.java | 6 +- .../java/util/regex/UCIBackReferenceSet.java | 16 +- .../classlib/java/util/regex/UCICharSet.java | 4 +- .../classlib/java/util/regex/UCIRangeSet.java | 4 +- .../java/util/regex/UCISequenceSet.java | 8 +- .../java/util/regex/UCISupplCharSet.java | 12 +- .../java/util/regex/UCISupplRangeSet.java | 2 + .../classlib/java/util/regex/UEOLSet.java | 9 +- .../java/util/regex/UMultiLineEOLSet.java | 13 +- .../java/util/regex/UnicodeCategory.java | 1 + .../java/util/regex/UnicodeCategoryScope.java | 1 + .../java/util/regex/UnifiedQuantifierSet.java | 21 +- .../java/util/regex/WordBoundary.java | 21 +- .../classlib/java/util/regex/PatternTest.java | 398 +--- .../classlib/java/util/regex/ReplaceTest.java | 14 +- .../classlib/java/util/regex/SplitTest.java | 22 +- 91 files changed, 2271 insertions(+), 6142 deletions(-) delete mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java delete mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java delete mode 100644 teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java diff --git a/teavm-classlib/pom.xml b/teavm-classlib/pom.xml index 1735093c3..aaa812b8a 100644 --- a/teavm-classlib/pom.xml +++ b/teavm-classlib/pom.xml @@ -83,6 +83,7 @@ java.util java.util.logging java.util.concurrent + java.util.regex -output ${project.build.directory}/jcl-report diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java index 14da9ff70..3b5844884 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractCharClass.java @@ -36,14 +36,12 @@ package org.teavm.classlib.java.util.regex; import java.util.BitSet; -import java.util.ListResourceBundle; /** - * This class represents character classes, i.e. - * sets of character either predefined or user defined. + * This class represents character classes, i.e. sets of character either + * predefined or user defined. * - * Note, this class represent token, not node, so being - * constructed by lexer. + * Note, this class represent token, not node, so being constructed by lexer. * * @author Nikolay A. Kuznetsov */ @@ -52,7 +50,7 @@ abstract class AbstractCharClass extends SpecialToken { protected boolean altSurrogates; - //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1 + // Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1 static int SURROGATE_CARDINALITY = 2048; BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY); @@ -64,8 +62,8 @@ abstract class AbstractCharClass extends SpecialToken { static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses(); /* - * Indicates if this class may contain supplementary Unicode codepoints. - * If this flag is specified it doesn't mean that this class contains + * Indicates if this class may contain supplementary Unicode codepoints. If + * this flag is specified it doesn't mean that this class contains * supplementary characters but may contain. */ protected boolean mayContainSupplCodepoints = false; @@ -79,8 +77,8 @@ abstract class AbstractCharClass extends SpecialToken { abstract public boolean contains(int ch); /** - * Returns BitSet representing this character class or null - * if this character class does not have character representation; + * Returns BitSet representing this character class or null if + * this character class does not have character representation; * * @return bitset */ @@ -93,15 +91,15 @@ abstract class AbstractCharClass extends SpecialToken { } public boolean hasLowHighSurrogates() { - return altSurrogates - ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY - : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY; + return altSurrogates ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY : lowHighSurrogates + .nextSetBit(0) < SURROGATE_CARDINALITY; } public boolean mayContainSupplCodepoints() { return mayContainSupplCodepoints; } + @Override public int getType() { return SpecialToken.TOK_CHARCLASS; } @@ -116,13 +114,12 @@ abstract class AbstractCharClass extends SpecialToken { final BitSet lHS = getLowHighSurrogates(); charClassWithSurrogates = new AbstractCharClass() { + @Override public boolean contains(int ch) { int index = ch - Character.MIN_SURROGATE; - return ((index >= 0) - && (index < AbstractCharClass.SURROGATE_CARDINALITY)) - ? this.altSurrogates ^ lHS.get(index) - : false; + return ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^ + lHS.get(index) : false; } }; charClassWithSurrogates.setNegative(this.altSurrogates); @@ -137,22 +134,19 @@ abstract class AbstractCharClass extends SpecialToken { final AbstractCharClass thisClass = this; charClassWithoutSurrogates = new AbstractCharClass() { + @Override public boolean contains(int ch) { int index = ch - Character.MIN_SURROGATE; - boolean containslHS = ((index >= 0) - && (index < AbstractCharClass.SURROGATE_CARDINALITY)) - ? this.altSurrogates ^ lHS.get(index) - : false; + boolean containslHS = ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^ + lHS.get(index) + : false; - - return thisClass.contains(ch) - && !containslHS; + return thisClass.contains(ch) && !containslHS; } }; charClassWithoutSurrogates.setNegative(isNegative()); - charClassWithoutSurrogates.mayContainSupplCodepoints - = mayContainSupplCodepoints; + charClassWithoutSurrogates.mayContainSupplCodepoints = mayContainSupplCodepoints; } return charClassWithoutSurrogates; @@ -201,16 +195,14 @@ abstract class AbstractCharClass extends SpecialToken { return cc.contains(ch); } - public static boolean intersects(AbstractCharClass cc1, - AbstractCharClass cc2) { + public static boolean intersects(AbstractCharClass cc1, AbstractCharClass cc2) { if (cc1.getBits() == null || cc2.getBits() == null) return true; return cc1.getBits().intersects(cc2.getBits()); } - public static AbstractCharClass getPredefinedClass(String name, - boolean negative) { - return ((LazyCharClass) charClasses.getObject(name)).getValue(negative); + public static AbstractCharClass getPredefinedClass(String name, boolean negative) { + return ((LazyCharClass)charClasses.getObject(name)).getValue(negative); } abstract static class LazyCharClass { @@ -233,12 +225,14 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyDigit extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add('0', '9'); } } static class LazyNonDigit extends LazyDigit { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = super.computeValue().setNegative(true); @@ -248,6 +242,7 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazySpace extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { /* 9-13 - \t\n\x0B\f\r; 32 - ' ' */ return new CharClass().add(9, 13).add(32); @@ -255,6 +250,7 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyNonSpace extends LazySpace { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = super.computeValue().setNegative(true); @@ -264,13 +260,14 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyWord extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { - return new CharClass().add('a', 'z').add('A', 'Z').add('0', '9') - .add('_'); + return new CharClass().add('a', 'z').add('A', 'Z').add('0', '9').add('_'); } } static class LazyNonWord extends LazyWord { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = super.computeValue().setNegative(true); @@ -280,70 +277,79 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyLower extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add('a', 'z'); } } static class LazyUpper extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add('A', 'Z'); } } static class LazyASCII extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add(0x00, 0x7F); } } static class LazyAlpha extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add('a', 'z').add('A', 'Z'); } } static class LazyAlnum extends LazyAlpha { + @Override protected AbstractCharClass computeValue() { - return ((CharClass) super.computeValue()).add('0', '9'); + return ((CharClass)super.computeValue()).add('0', '9'); } } static class LazyPunct extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { /* Punctuation !"#$%&'()*+,-./:;<=>?@ [\]^_` {|}~ */ - return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, - 0x7E); + return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E); } } static class LazyGraph extends LazyAlnum { + @Override protected AbstractCharClass computeValue() { /* plus punctuation */ - return ((CharClass) super.computeValue()).add(0x21, 0x40).add(0x5B, - 0x60).add(0x7B, 0x7E); + return ((CharClass)super.computeValue()).add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E); } } static class LazyPrint extends LazyGraph { + @Override protected AbstractCharClass computeValue() { - return ((CharClass) super.computeValue()).add(0x20); + return ((CharClass)super.computeValue()).add(0x20); } } static class LazyBlank extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add(' ').add('\t'); } } static class LazyCntrl extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add(0x00, 0x1F).add(0x7F); } } static class LazyXDigit extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new CharClass().add('0', '9').add('a', 'f').add('A', 'F'); } @@ -357,6 +363,7 @@ abstract class AbstractCharClass extends SpecialToken { this.end = end; } + @Override public AbstractCharClass computeValue() { AbstractCharClass chCl = new CharClass().add(start, end); return chCl; @@ -364,6 +371,7 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazySpecialsBlock extends LazyCharClass { + @Override public AbstractCharClass computeValue() { return new CharClass().add(0xFEFF, 0xFEFF).add(0xFFF0, 0xFFFD); } @@ -381,13 +389,13 @@ abstract class AbstractCharClass extends SpecialToken { this.category = cat; } - public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints, - boolean containsAllSurrogates) { + public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) { this.containsAllSurrogates = containsAllSurrogates; this.mayContainSupplCodepoints = mayContainSupplCodepoints; this.category = cat; } + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new UnicodeCategoryScope(category); if (containsAllSurrogates) { @@ -410,26 +418,30 @@ abstract class AbstractCharClass extends SpecialToken { this.mayContainSupplCodepoints = mayContainSupplCodepoints; this.category = cat; } - public LazyCategory(int cat, boolean mayContainSupplCodepoints, - boolean containsAllSurrogates) { + + public LazyCategory(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) { this.containsAllSurrogates = containsAllSurrogates; this.mayContainSupplCodepoints = mayContainSupplCodepoints; this.category = cat; } + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new UnicodeCategory(category); if (containsAllSurrogates) { chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY); } - chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;; + chCl.mayContainSupplCodepoints = mayContainSupplCodepoints; + ; return chCl; } } static class LazyJavaLowerCase extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isLowerCase(ch); } @@ -441,8 +453,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaUpperCase extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isUpperCase(ch); } @@ -454,8 +468,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaWhitespace extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isWhitespace(ch); } @@ -464,11 +480,13 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaMirrored extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new AbstractCharClass() { + @Override public boolean contains(int ch) { // TODO implement this method and uncomment - //return Character.isMirrored(ch); + // return Character.isMirrored(ch); return false; } }; @@ -476,8 +494,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaDefined extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isDefined(ch); } @@ -490,8 +510,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaDigit extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isDigit(ch); } @@ -503,8 +525,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaIdentifierIgnorable extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isIdentifierIgnorable(ch); } @@ -516,8 +540,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaISOControl extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isISOControl(ch); } @@ -526,8 +552,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaJavaIdentifierPart extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isJavaIdentifierPart(ch); } @@ -539,8 +567,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaJavaIdentifierStart extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isJavaIdentifierStart(ch); } @@ -552,8 +582,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaLetter extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isLetter(ch); } @@ -565,8 +597,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaLetterOrDigit extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isLetterOrDigit(ch); } @@ -578,8 +612,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaSpaceChar extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isSpaceChar(ch); } @@ -588,8 +624,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaTitleCase extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { return new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isTitleCase(ch); } @@ -598,8 +636,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaUnicodeIdentifierPart extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isUnicodeIdentifierPart(ch); } @@ -611,8 +651,10 @@ abstract class AbstractCharClass extends SpecialToken { } static class LazyJavaUnicodeIdentifierStart extends LazyCharClass { + @Override protected AbstractCharClass computeValue() { AbstractCharClass chCl = new AbstractCharClass() { + @Override public boolean contains(int ch) { return Character.isUnicodeIdentifierStart(ch); } @@ -624,11 +666,10 @@ abstract class AbstractCharClass extends SpecialToken { } /** - * character classes generated from - * http://www.unicode.org/reports/tr18/ + * character classes generated from http://www.unicode.org/reports/tr18/ * http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt */ - static final class PredefinedCharacterClasses { + static final class PredefinedCharacterClasses { static LazyCharClass space = new LazySpace(); static LazyCharClass digit = new LazyDigit(); @@ -823,13 +864,12 @@ abstract class AbstractCharClass extends SpecialToken { { "Cf", new LazyCategory(Character.FORMAT, true) }, { "Co", new LazyCategory(Character.PRIVATE_USE, true) }, { "Cs", new LazyCategory(Character.SURROGATE, false, true) }, - {"IsP", new LazyCategoryScope((1 << Character.DASH_PUNCTUATION) | - (1 << Character.START_PUNCTUATION) | - (1 << Character.END_PUNCTUATION) | - (1 << Character.CONNECTOR_PUNCTUATION) | - (1 << Character.OTHER_PUNCTUATION) | - (1 << Character.INITIAL_QUOTE_PUNCTUATION) | - (1 << Character.FINAL_QUOTE_PUNCTUATION), true)}, + { + "IsP", + new LazyCategoryScope((1 << Character.DASH_PUNCTUATION) | (1 << Character.START_PUNCTUATION) | + (1 << Character.END_PUNCTUATION) | (1 << Character.CONNECTOR_PUNCTUATION) | + (1 << Character.OTHER_PUNCTUATION) | (1 << Character.INITIAL_QUOTE_PUNCTUATION) | + (1 << Character.FINAL_QUOTE_PUNCTUATION), true) }, { "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) }, { "Ps", new LazyCategory(Character.START_PUNCTUATION, false) }, { "Pe", new LazyCategory(Character.END_PUNCTUATION, false) }, @@ -842,6 +882,7 @@ abstract class AbstractCharClass extends SpecialToken { { "So", new LazyCategory(Character.OTHER_SYMBOL, true) }, { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) }, { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } }; + public Object getObject(String name) { for (int i = 0; i < contents.length; ++i) { Object[] row = contents[i]; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java index 99d33e14c..02770355f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractLineTerminator.java @@ -54,10 +54,12 @@ abstract class AbstractLineTerminator { if (unixLT != null) return unixLT; unixLT = new AbstractLineTerminator() { + @Override public boolean isLineTerminator(int ch) { return ch == '\n'; } + @Override public boolean isAfterLineTerminator(int ch, int ch2) { return ch == '\n'; } @@ -67,13 +69,14 @@ abstract class AbstractLineTerminator { if (unicodeLT != null) return unicodeLT; unicodeLT = new AbstractLineTerminator() { + @Override public boolean isLineTerminator(int ch) { return (ch == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'); } + @Override public boolean isAfterLineTerminator(int ch, int ch2) { - return (ch == '\n' || ch == '\u0085' || (ch | 1) == '\u2029') - || (ch == '\r' && ch2 != '\n'); + return (ch == '\n' || ch == '\u0085' || (ch | 1) == '\u2029') || (ch == '\r' && ch2 != '\n'); } }; return unicodeLT; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java index 19788c1cf..3c32e1fff 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AbstractSet.java @@ -36,8 +36,8 @@ package org.teavm.classlib.java.util.regex; /** - * Basic class for nodes, representing given regular expression. - * Note: All the classes representing nodes has set prefix; + * Basic class for nodes, representing given regular expression. Note: All the + * classes representing nodes has set prefix; * * @author Nikolay A. Kuznetsov */ @@ -75,16 +75,19 @@ abstract class AbstractSet { } /** - * Checks if this node matches in given position and recursively call - * next node matches on positive self match. Returns positive integer if - * entire match succeed, negative otherwise - * @param stringIndex - string index to start from; - * @param testString - input string - * @param matchResult - MatchResult to sore result into + * Checks if this node matches in given position and recursively call next + * node matches on positive self match. Returns positive integer if entire + * match succeed, negative otherwise + * + * @param stringIndex + * - string index to start from; + * @param testString + * - input string + * @param matchResult + * - MatchResult to sore result into * @return -1 if match fails or n > 0; */ - public abstract int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult); + public abstract int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult); /** * Attempts to apply pattern starting from this set/stringIndex; returns @@ -93,10 +96,9 @@ abstract class AbstractSet { * matchResult; * * Note: this is default implementation for find method, it's based on - * matches, subclasses do not have to override find method unless - * more effective find method exists for a particular node type - * (sequence, i.e. substring, for example). Same applies for find back - * method. + * matches, subclasses do not have to override find method unless more + * effective find method exists for a particular node type (sequence, i.e. + * substring, for example). Same applies for find back method. * * @param stringIndex * starting index @@ -106,8 +108,7 @@ abstract class AbstractSet { * result of the match * @return last searched index */ - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int length = matchResult.getRightBound(); while (stringIndex <= length) { if (matches(stringIndex, testString, matchResult) >= 0) { @@ -120,19 +121,18 @@ abstract class AbstractSet { } /** - * @param stringIndex - - * an index, to finish search back (left limit) - * @param startSearch - - * an index to start search from (right limit) - * @param testString - - * test string; + * @param stringIndex + * - an index, to finish search back (left limit) + * @param startSearch + * - an index to start search from (right limit) + * @param testString + * - test string; * @param matchResult * match result * @return an index to start back search next time if this search fails(new * left bound); if this search fails the value is negative; */ - public int findBack(int stringIndex, int startSearch, - CharSequence testString, MatchResultImpl matchResult) { + public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) { while (startSearch >= stringIndex) { if (matches(startSearch, testString, matchResult) >= 0) { return startSearch; @@ -144,10 +144,10 @@ abstract class AbstractSet { } /** - * Returns true, if this node has consumed any characters during - * positive match attempt, for example node representing character always - * consumes one character if it matches. If particular node matches - * empty sting this method will return false; + * Returns true, if this node has consumed any characters during positive + * match attempt, for example node representing character always consumes + * one character if it matches. If particular node matches empty sting this + * method will return false; * * @param matchResult * @return @@ -155,8 +155,7 @@ abstract class AbstractSet { public abstract boolean hasConsumed(MatchResultImpl matchResult); /** - * Returns name for the particular node type. - * Used for debugging purposes. + * Returns name for the particular node type. Used for debugging purposes. */ protected abstract String getName(); @@ -169,9 +168,10 @@ abstract class AbstractSet { } protected String getQualifiedName() { - return "<" + index + ":" + getName() + ">"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + return "<" + index + ":" + getName() + ">"; } + @Override public String toString() { return getQualifiedName(); } @@ -185,6 +185,7 @@ abstract class AbstractSet { /** * Sets next abstract set + * * @param next * The next to set. */ @@ -193,15 +194,15 @@ abstract class AbstractSet { } /** - * Returns true if the given node intersects with this one, - * false otherwise. - * This method is being used for quantifiers construction, - * lets consider the following regular expression (a|b)*ccc. + * Returns true if the given node intersects with this one, false otherwise. + * This method is being used for quantifiers construction, lets consider the + * following regular expression (a|b)*ccc. * - * (a|b) does not intersects with "ccc" and thus can be quantified - * greedily (w/o kickbacks), like *+ instead of *. + * (a|b) does not intersects with "ccc" and thus can be quantified greedily + * (w/o kickbacks), like *+ instead of *. * - * @param set - usually previous node + * @param set + * - usually previous node * * @return true if the given node intersects with this one */ @@ -210,64 +211,62 @@ abstract class AbstractSet { } /** - * This method is used for replacement backreferenced - * sets. + * This method is used for replacement backreferenced sets. * - * @param prev - node who references to this node - * @return null if current node need not to be replaced - * JointSet which is replacement of - * current node otherwise + * @param prev + * - node who references to this node + * @return null if current node need not to be replaced JointSet which is + * replacement of current node otherwise */ public JointSet processBackRefReplacement() { return null; } /** - * This method is used for traversing nodes after the - * first stage of compilation. + * This method is used for traversing nodes after the first stage of + * compilation. */ public void processSecondPass() { - this.isSecondPassVisited = true; + this.isSecondPassVisited = true; - if (next != null) { + if (next != null) { - if (!next.isSecondPassVisited) { + if (!next.isSecondPassVisited) { - /* - * Add here code to do during the pass - */ - JointSet set = next.processBackRefReplacement(); + /* + * Add here code to do during the pass + */ + JointSet set = next.processBackRefReplacement(); - if (set != null) { - next.isSecondPassVisited = true; - next =(AbstractSet) set; - } + if (set != null) { + next.isSecondPassVisited = true; + next = set; + } - /* - * End code to do during the pass - */ - next.processSecondPass(); - } else { + /* + * End code to do during the pass + */ + next.processSecondPass(); + } else { - /* - * We reach node through next but it is already traversed. - * You can see this situation for AltGroupQuantifierSet.next - * when we reach this node through - * AltGroupQuantifierSet.innerset. ... .next - */ + /* + * We reach node through next but it is already traversed. You + * can see this situation for AltGroupQuantifierSet.next when we + * reach this node through AltGroupQuantifierSet.innerset. ... + * .next + */ - /* - * Add here code to do during the pass - */ - if (next instanceof SingleSet - && ((FSet) ((JointSet) next).fSet).isBackReferenced) { - next = next.next; - } + /* + * Add here code to do during the pass + */ + if (next instanceof SingleSet && ((FSet)((JointSet)next).fSet).isBackReferenced) { + next = next.next; + } - /* - * End code to do during the pass - */ - } - } + /* + * End code to do during the pass + */ + } + } } } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java index 2ef5d69f3..47ed0ac25 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AheadFSet.java @@ -41,17 +41,17 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class AheadFSet extends FSet { - public AheadFSet() { super(-1); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { return stringIndex; } + @Override protected String getName() { - return "AheadFSet"; //$NON-NLS-1$ + return "AheadFSet"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java index 83ea21ad4..c27b4193f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltGroupQuantifierSet.java @@ -37,19 +37,16 @@ package org.teavm.classlib.java.util.regex; /** * Represents "?" quantifier over composite sets. + * * @author Nikolay A. Kuznetsov */ class AltGroupQuantifierSet extends GroupQuantifierSet { - - public AltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public AltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); - } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (!innerSet.hasConsumed(matchResult)) return next.matches(stringIndex, testString, matchResult); @@ -62,6 +59,7 @@ class AltGroupQuantifierSet extends GroupQuantifierSet { } } + @Override public void setNext(AbstractSet next) { super.setNext(next); innerSet.setNext(next); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java index 571ecde3b..8c476ec57 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AltQuantifierSet.java @@ -46,8 +46,8 @@ class AltQuantifierSet extends LeafQuantifierSet { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift = 0; if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) { @@ -57,6 +57,7 @@ class AltQuantifierSet extends LeafQuantifierSet { } } + @Override public void setNext(AbstractSet next) { super.setNext(next); innerSet.setNext(next); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java index 9c2b92d3d..9396a9e6a 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicFSet.java @@ -46,8 +46,8 @@ class AtomicFSet extends FSet { super(groupIndex); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int gr = getGroupIndex(); matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr)); @@ -60,10 +60,12 @@ class AtomicFSet extends FSet { return index; } + @Override protected String getName() { - return "AtomicFSet"; //$NON-NLS-1$ + return "AtomicFSet"; } + @Override public boolean hasConsumed(MatchResultImpl mr) { return false; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java index 56e203beb..92fa543ba 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/AtomicJointSet.java @@ -38,34 +38,32 @@ package org.teavm.classlib.java.util.regex; import java.util.ArrayList; /** - * This class represent atomic group (?>X), once X matches, - * this match become unchangeable till the end of the match. + * This class represent atomic group (?>X), once X matches, this match become + * unchangeable till the end of the match. * * @author Nikolay A. Kuznetsov */ class AtomicJointSet extends NonCapJointSet { - - public AtomicJointSet(ArrayList children, FSet fSet) { + public AtomicJointSet(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int start = matchResult.getConsumed(groupIndex); matchResult.setConsumed(groupIndex, stringIndex); int size = children.size(); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); int shift = e.matches(stringIndex, testString, matchResult); if (shift >= 0) { // AtomicFset always returns true, but saves the index to run // this next.match() from; - return next.matches(((AtomicFSet) fSet).getIndex(), testString, - matchResult); + return next.matches(((AtomicFSet)fSet).getIndex(), testString, matchResult); } } @@ -73,14 +71,17 @@ class AtomicJointSet extends NonCapJointSet { return -1; } + @Override public void setNext(AbstractSet next) { this.next = next; } + @Override public AbstractSet getNext() { return next; } + @Override protected String getName() { return "NonCapJointSet"; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java index 35a56ebbc..c7cd45de1 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferenceSet.java @@ -46,15 +46,12 @@ class BackReferenceSet extends CIBackReferenceSet { super(groupIndex, consCounter); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { String group = getString(matchResult); - if (group == null - || (stringIndex + group.length()) > matchResult.getRightBound()) + if (group == null || (stringIndex + group.length()) > matchResult.getRightBound()) return -1; - int shift = testString.toString().startsWith(group, stringIndex) ? group - .length() - : -1; + int shift = testString.toString().startsWith(group, stringIndex) ? group.length() : -1; if (shift < 0) { return -1; @@ -63,8 +60,8 @@ class BackReferenceSet extends CIBackReferenceSet { return next.matches(stringIndex + shift, testString, matchResult); } - public int find(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { String group = getString(matchResult); int strLength = matchResult.getLeftBound(); @@ -78,8 +75,7 @@ class BackReferenceSet extends CIBackReferenceSet { if (strIndex < 0) return -1; - if (next - .matches(strIndex + group.length(), testString, matchResult) >= 0) { + if (next.matches(strIndex + group.length(), testString, matchResult) >= 0) { return strIndex; } @@ -89,8 +85,8 @@ class BackReferenceSet extends CIBackReferenceSet { return -1; } - public int findBack(int strIndex, int lastIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { String group = getString(matchResult); if (group == null) @@ -103,8 +99,7 @@ class BackReferenceSet extends CIBackReferenceSet { if (lastIndex < 0 || lastIndex < strIndex) return -1; - if (next.matches(lastIndex + group.length(), testString, - matchResult) >= 0) { + if (next.matches(lastIndex + group.length(), testString, matchResult) >= 0) { return lastIndex; } @@ -113,10 +108,12 @@ class BackReferenceSet extends CIBackReferenceSet { return -1; } + @Override public boolean first(AbstractSet set) { return true; } + @Override public String getName() { return "back reference: " + this.groupIndex; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java index 8e588d2e9..3578288dd 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BackReferencedSingleSet.java @@ -37,22 +37,19 @@ package org.teavm.classlib.java.util.regex; /** - * Group node over subexpression w/o alternations. - * This node is used if current group is referenced - * via backreference. + * Group node over subexpression w/o alternations. This node is used if current + * group is referenced via backreference. */ class BackReferencedSingleSet extends SingleSet { /* - * This class is needed only for overwriting find() - * and findBack() methods of SingleSet class, which is being - * back referenced. The following example explains the need - * for such substitution: - * Let's consider the pattern ".*(.)\\1". - * Leading .* works as follows: finds line terminator and runs findBack - * from that point. findBack method in its turn (in contrast to matches) - * sets group boundaries on the back trace. Thus at the point we + * This class is needed only for overwriting find() and findBack() methods + * of SingleSet class, which is being back referenced. The following example + * explains the need for such substitution: Let's consider the pattern + * ".*(.)\\1". Leading .* works as follows: finds line terminator and runs + * findBack from that point. findBack method in its turn (in contrast to + * matches) sets group boundaries on the back trace. Thus at the point we * try to match back reference(\\1) groups are not yet set. * * To fix this problem we replace backreferenced groups with instances of @@ -60,51 +57,51 @@ class BackReferencedSingleSet extends SingleSet { * performance, but ensure correctness of the match. */ - public BackReferencedSingleSet(AbstractSet child, FSet fSet) { + public BackReferencedSingleSet(AbstractSet child, FSet fSet) { super(child, fSet); } - public BackReferencedSingleSet(SingleSet node) { - super(node.kid, ((FSet) node.fSet)); - } + public BackReferencedSingleSet(SingleSet node) { + super(node.kid, ((FSet)node.fSet)); + } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int res = 0; int lastIndex = matchResult.getRightBound(); int startSearch = stringIndex; for (; startSearch <= lastIndex; startSearch++) { - int saveStart = matchResult.getStart(groupIndex); + int saveStart = matchResult.getStart(groupIndex); - matchResult.setStart(groupIndex, startSearch); - res = kid.matches(startSearch, testString, matchResult); - if (res >= 0) { - res = startSearch; - break; - } else { - matchResult.setStart(groupIndex, saveStart); - } + matchResult.setStart(groupIndex, startSearch); + res = kid.matches(startSearch, testString, matchResult); + if (res >= 0) { + res = startSearch; + break; + } else { + matchResult.setStart(groupIndex, saveStart); + } } return res; } - public int findBack(int stringIndex, int lastIndex, - CharSequence testString, MatchResultImpl matchResult) { + @Override + public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { int res = 0; int startSearch = lastIndex; for (; startSearch >= stringIndex; startSearch--) { - int saveStart = matchResult.getStart(groupIndex); + int saveStart = matchResult.getStart(groupIndex); matchResult.setStart(groupIndex, startSearch); res = kid.matches(startSearch, testString, matchResult); if (res >= 0) { - res = startSearch; - break; + res = startSearch; + break; } else { - matchResult.setStart(groupIndex, saveStart); + matchResult.setStart(groupIndex, saveStart); } } @@ -112,11 +109,12 @@ class BackReferencedSingleSet extends SingleSet { } /** - * This method is used for replacement backreferenced - * sets. + * This method is used for replacement backreferenced sets. * - * @param prev - node who references to this node + * @param prev + * - node who references to this node */ + @Override public JointSet processBackRefReplacement() { return null; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java index 37bd75872..517571574 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/BehindFSet.java @@ -43,19 +43,19 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class BehindFSet extends FSet { - public BehindFSet(int groupIndex) { super(groupIndex); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int gr = getGroupIndex(); int rightBound = matchResult.getConsumed(gr); return (rightBound == stringIndex) ? stringIndex : -1; } + @Override protected String getName() { return "BehindFSet"; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java index 45c3176ca..0e517834d 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CIBackReferenceSet.java @@ -41,47 +41,41 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class CIBackReferenceSet extends JointSet { - protected int referencedGroup; - protected int consCounter; - /** - * @param substring - */ public CIBackReferenceSet(int groupIndex, int consCounter) { this.referencedGroup = groupIndex; this.consCounter = consCounter; } public int accepts(int strIndex, CharSequence testString) { - throw new TPatternSyntaxException("", "", 0); + throw new TPatternSyntaxException(strIndex + ", " + testString, "", 0); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { String group = getString(matchResult); - if (group == null - || (stringIndex + group.length()) > matchResult.getRightBound()) + if (group == null || (stringIndex + group.length()) > matchResult.getRightBound()) return -1; for (int i = 0; i < group.length(); i++) { - if (group.charAt(i) != testString.charAt(stringIndex + i) - && TPattern.getSupplement(group.charAt(i)) != testString - .charAt(stringIndex + i)) { + if (group.charAt(i) != testString.charAt(stringIndex + i) && + TPattern.getSupplement(group.charAt(i)) != testString.charAt(stringIndex + i)) { return -1; } } matchResult.setConsumed(consCounter, group.length()); - return next.matches(stringIndex + group.length(), testString, - matchResult); + return next.matches(stringIndex + group.length(), testString, matchResult); } + @Override public AbstractSet getNext() { return this.next; } + @Override public void setNext(AbstractSet next) { this.next = next; } @@ -89,13 +83,14 @@ class CIBackReferenceSet extends JointSet { protected String getString(MatchResultImpl matchResult) { String res = matchResult.getGroupNoCheck(referencedGroup); return res; - // return (res != null) ? res : ""; } + @Override public String getName() { - return "CI back reference: " + this.groupIndex; //$NON-NLS-1$ + return "CI back reference: " + this.groupIndex; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons; boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java index 2d4537d62..31cdae8bb 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CICharSet.java @@ -36,8 +36,7 @@ package org.teavm.classlib.java.util.regex; /** - * Represents node accepting single character in - * case insensitive manner. + * Represents node accepting single character in case insensitive manner. * * @author Nikolay A. Kuznetsov */ @@ -52,13 +51,14 @@ class CICharSet extends LeafSet { this.supplement = TPattern.getSupplement(ch); } + @Override public int accepts(int strIndex, CharSequence testString) { - return (this.ch == testString.charAt(strIndex) - || this.supplement == testString.charAt(strIndex)) ? 1 : -1; + return (this.ch == testString.charAt(strIndex) || this.supplement == testString.charAt(strIndex)) ? 1 : -1; } + @Override protected String getName() { - return "CI " + ch; //$NON-NLS-1$ + return "CI " + ch; } protected char getChar() { diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java index 797023461..1f564b966 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CISequenceSet.java @@ -41,31 +41,28 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class CISequenceSet extends LeafSet { - private String string = null; - /** - * Constructs this sequence set - */ - CISequenceSet(StringBuffer substring) { - this.string = substring.toString(); - this.charCount = substring.length(); - } + CISequenceSet(StringBuffer substring) { + this.string = substring.toString(); + this.charCount = substring.length(); + } - public int accepts(int strIndex, CharSequence testString) { - for (int i = 0; i < string.length(); i++) { - if (string.charAt(i) != testString.charAt(strIndex + i) - && TPattern.getSupplement(string.charAt(i)) != testString - .charAt(strIndex + i)) { - return -1; - } - } + @Override + public int accepts(int strIndex, CharSequence testString) { + for (int i = 0; i < string.length(); i++) { + if (string.charAt(i) != testString.charAt(strIndex + i) && + TPattern.getSupplement(string.charAt(i)) != testString.charAt(strIndex + i)) { + return -1; + } + } - return string.length(); + return string.length(); - } + } - public String getName() { - return "CI sequence: " + string; //$NON-NLS-1$ - } + @Override + public String getName() { + return "CI sequence: " + string; + } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java deleted file mode 100644 index a6e301e77..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CanClasses.java +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright 2014 Alexey Andreev. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.teavm.classlib.java.util.regex; - -/** - * This class gives us a hashtable that contains canonical - * classes that are generated from - * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. - */ -class CanClasses{ - - private static IntHash table = null; - - private CanClasses() { - } - - public static IntHash getHashCanClasses() { - if (table != null) { - return table; - } else { - table = new IntHash(384); - table.put(0x20D0, 230); - table.put(0x0A4D, 9); - table.put(0x0E49, 107); - table.put(0x0954, 230); - table.put(0x0E48, 107); - table.put(0x0953, 230); - table.put(0x0952, 220); - table.put(0x0951, 230); - table.put(0x094D, 9); - table.put(0x0E3A, 9); - table.put(0x17DD, 230); - table.put(0x0A3C, 7); - table.put(0x0E39, 103); - table.put(0x0E38, 103); - table.put(0x17D2, 9); - table.put(0x093C, 7); - table.put(0x06ED, 220); - table.put(0x06EC, 230); - table.put(0x06EB, 230); - table.put(0x06EA, 220); - table.put(0x06E8, 230); - table.put(0x06E7, 230); - table.put(0x06E4, 230); - table.put(0x06E3, 220); - table.put(0x06E2, 230); - table.put(0x06E1, 230); - table.put(0x06E0, 230); - table.put(0x06DF, 230); - table.put(0x06DC, 230); - table.put(0x06DB, 230); - table.put(0x06DA, 230); - table.put(0x0BCD, 9); - table.put(0x0486, 230); - table.put(0x0485, 230); - table.put(0x0484, 230); - table.put(0x0FC6, 220); - table.put(0x0483, 230); - table.put(0x06D9, 230); - table.put(0x06D8, 230); - table.put(0x06D7, 230); - table.put(0x06D6, 230); - table.put(0xA806, 9); - table.put(0x193B, 220); - table.put(0x193A, 230); - table.put(0x1939, 222); - table.put(0x0D4D, 9); - table.put(0x1A18, 220); - table.put(0x1A17, 230); - table.put(0x1D1AD, 230); - table.put(0x1D1AC, 230); - table.put(0x1D1AB, 230); - table.put(0x1D1AA, 230); - table.put(0xFB1E, 26); - table.put(0x0ECB, 122); - table.put(0x0ECA, 122); - table.put(0x0ACD, 9); - table.put(0x0EC9, 122); - table.put(0x0EC8, 122); - table.put(0x09CD, 9); - table.put(0x0ABC, 7); - table.put(0x0EB9, 118); - table.put(0x0EB8, 118); - table.put(0x05C7, 18); - table.put(0x05C5, 220); - table.put(0x05C4, 230); - table.put(0x05C2, 25); - table.put(0x05C1, 24); - table.put(0x036F, 230); - table.put(0x036E, 230); - table.put(0x09BC, 7); - table.put(0x036D, 230); - table.put(0x036C, 230); - table.put(0x036B, 230); - table.put(0x036A, 230); - table.put(0x05BF, 23); - table.put(0x05BD, 22); - table.put(0x05BC, 21); - table.put(0x05BB, 20); - table.put(0x0C56, 91); - table.put(0x0C55, 84); - table.put(0x0369, 230); - table.put(0x0368, 230); - table.put(0x0367, 230); - table.put(0x0366, 230); - table.put(0x0365, 230); - table.put(0x0364, 230); - table.put(0x0363, 230); - table.put(0x0362, 233); - table.put(0x05B9, 19); - table.put(0x0361, 234); - table.put(0x05B8, 18); - table.put(0x0360, 234); - table.put(0x05B7, 17); - table.put(0x05B6, 16); - table.put(0x05B5, 15); - table.put(0x05B4, 14); - table.put(0x05B3, 13); - table.put(0x05B2, 12); - table.put(0x05B1, 11); - table.put(0x0C4D, 9); - table.put(0x05B0, 10); - table.put(0x035F, 233); - table.put(0x035E, 234); - table.put(0x035D, 234); - table.put(0x035C, 233); - table.put(0x035B, 230); - table.put(0x035A, 220); - table.put(0x05AF, 230); - table.put(0x05AE, 228); - table.put(0x05AD, 222); - table.put(0x05AC, 230); - table.put(0x05AB, 230); - table.put(0x05AA, 220); - table.put(0x1039, 9); - table.put(0x0359, 220); - table.put(0x0358, 232); - table.put(0x1037, 7); - table.put(0x0357, 230); - table.put(0x0356, 220); - table.put(0x0355, 220); - table.put(0x0354, 220); - table.put(0x0353, 220); - table.put(0x0352, 230); - table.put(0x05A9, 230); - table.put(0x0351, 230); - table.put(0x05A8, 230); - table.put(0x0350, 230); - table.put(0x05A7, 220); - table.put(0x05A6, 220); - table.put(0x05A5, 220); - table.put(0x05A4, 220); - table.put(0x05A3, 220); - table.put(0x05A2, 220); - table.put(0x074A, 230); - table.put(0x05A1, 230); - table.put(0x05A0, 230); - table.put(0x034E, 220); - table.put(0x034D, 220); - table.put(0x034C, 230); - table.put(0x034B, 230); - table.put(0x0749, 230); - table.put(0x034A, 230); - table.put(0x0748, 220); - table.put(0x0747, 230); - table.put(0x0746, 220); - table.put(0x0745, 230); - table.put(0x0744, 220); - table.put(0x0743, 230); - table.put(0x0742, 220); - table.put(0x0741, 230); - table.put(0x0349, 220); - table.put(0x0740, 230); - table.put(0x0348, 220); - table.put(0x0347, 220); - table.put(0x0346, 230); - table.put(0x0345, 240); - table.put(0x0344, 230); - table.put(0x0343, 230); - table.put(0x0342, 230); - table.put(0x0341, 230); - table.put(0x0340, 230); - table.put(0x073F, 230); - table.put(0x073E, 220); - table.put(0x073D, 230); - table.put(0x073C, 220); - table.put(0x073B, 220); - table.put(0x073A, 230); - table.put(0x309A, 8); - table.put(0x033F, 230); - table.put(0x033E, 230); - table.put(0x033D, 230); - table.put(0x033C, 220); - table.put(0x033B, 220); - table.put(0x0739, 220); - table.put(0x033A, 220); - table.put(0x0738, 220); - table.put(0x0737, 220); - table.put(0x0736, 230); - table.put(0x3099, 8); - table.put(0x0735, 230); - table.put(0xFE23, 230); - table.put(0x0734, 220); - table.put(0x0F87, 230); - table.put(0xFE22, 230); - table.put(0x0733, 230); - table.put(0x0F86, 230); - table.put(0xFE21, 230); - table.put(0x0732, 230); - table.put(0xFE20, 230); - table.put(0x0731, 220); - table.put(0x0F84, 9); - table.put(0x0339, 220); - table.put(0x0730, 230); - table.put(0x0F83, 230); - table.put(0x0338, 1); - table.put(0x0F82, 230); - table.put(0x0337, 1); - table.put(0x0336, 1); - table.put(0x0F80, 130); - table.put(0x0335, 1); - table.put(0x0334, 1); - table.put(0x0333, 220); - table.put(0x0332, 220); - table.put(0x0331, 220); - table.put(0x0330, 220); - table.put(0x1D244, 230); - table.put(0x1D243, 230); - table.put(0x1D242, 230); - table.put(0x0F7D, 130); - table.put(0x0F7C, 130); - table.put(0x0F7B, 130); - table.put(0x0F7A, 130); - table.put(0x032F, 220); - table.put(0x032E, 220); - table.put(0x032D, 220); - table.put(0x032C, 220); - table.put(0x032B, 220); - table.put(0x032A, 220); - table.put(0x0F74, 132); - table.put(0x0329, 220); - table.put(0x0328, 202); - table.put(0x0F72, 130); - table.put(0x0327, 202); - table.put(0x0DCA, 9); - table.put(0x0F71, 129); - table.put(0x0326, 220); - table.put(0x0325, 220); - table.put(0x0324, 220); - table.put(0x0323, 220); - table.put(0x0322, 202); - table.put(0x0321, 202); - table.put(0x0320, 220); - table.put(0x10A3F, 9); - table.put(0x135F, 230); - table.put(0x10A3A, 220); - table.put(0x031F, 220); - table.put(0x031E, 220); - table.put(0x031D, 220); - table.put(0x031C, 220); - table.put(0x031B, 216); - table.put(0x031A, 232); - table.put(0x10A39, 1); - table.put(0x10A38, 230); - table.put(0x0711, 36); - table.put(0x0319, 220); - table.put(0x0318, 220); - table.put(0x0317, 220); - table.put(0x0316, 220); - table.put(0x0315, 232); - table.put(0x0314, 230); - table.put(0x1D18B, 220); - table.put(0x0313, 230); - table.put(0x1D18A, 220); - table.put(0x0312, 230); - table.put(0x0311, 230); - table.put(0x0670, 35); - table.put(0x0310, 230); - table.put(0x1D189, 230); - table.put(0x1D188, 230); - table.put(0x1D187, 230); - table.put(0x1D186, 230); - table.put(0x030F, 230); - table.put(0x1D185, 230); - table.put(0x030E, 230); - table.put(0x030D, 230); - table.put(0x030C, 230); - table.put(0x1D182, 220); - table.put(0x030B, 230); - table.put(0x1D181, 220); - table.put(0x030A, 230); - table.put(0x1D180, 220); - table.put(0x0309, 230); - table.put(0x0308, 230); - table.put(0x1D17F, 220); - table.put(0x0307, 230); - table.put(0x1D17E, 220); - table.put(0x0306, 230); - table.put(0x1D17D, 220); - table.put(0x0305, 230); - table.put(0x1D17C, 220); - table.put(0x0304, 230); - table.put(0x1D17B, 220); - table.put(0x0303, 230); - table.put(0x0302, 230); - table.put(0x0301, 230); - table.put(0x0300, 230); - table.put(0x065E, 230); - table.put(0x065D, 230); - table.put(0x065C, 220); - table.put(0x065B, 230); - table.put(0x1D172, 216); - table.put(0x065A, 230); - table.put(0x1D171, 216); - table.put(0x0B4D, 9); - table.put(0x1D170, 216); - table.put(0x1734, 9); - table.put(0x0659, 230); - table.put(0x0658, 230); - table.put(0x0657, 230); - table.put(0x1D16F, 216); - table.put(0x0656, 220); - table.put(0x1D16E, 216); - table.put(0x0655, 220); - table.put(0x1D16D, 226); - table.put(0x0654, 230); - table.put(0x0653, 230); - table.put(0x0652, 34); - table.put(0x0651, 33); - table.put(0x0650, 32); - table.put(0x10A0F, 230); - table.put(0x10A0D, 220); - table.put(0x1D169, 1); - table.put(0x1D168, 1); - table.put(0x1D167, 1); - table.put(0x064F, 31); - table.put(0x1D166, 216); - table.put(0x064E, 30); - table.put(0x1D165, 216); - table.put(0x064D, 29); - table.put(0x064C, 28); - table.put(0x064B, 27); - table.put(0x0B3C, 7); - table.put(0x0F39, 216); - table.put(0x0F37, 220); - table.put(0x0F35, 220); - table.put(0x1DC3, 230); - table.put(0x1DC2, 220); - table.put(0x1DC1, 230); - table.put(0x1DC0, 230); - table.put(0x059F, 230); - table.put(0x1714, 9); - table.put(0x059E, 230); - table.put(0x059D, 230); - table.put(0x059C, 230); - table.put(0x059B, 220); - table.put(0x059A, 222); - table.put(0x0599, 230); - table.put(0x0598, 230); - table.put(0x0597, 230); - table.put(0x0596, 220); - table.put(0x0595, 230); - table.put(0x0594, 230); - table.put(0x0593, 230); - table.put(0x302F, 224); - table.put(0x0592, 230); - table.put(0x302E, 224); - table.put(0x0591, 220); - table.put(0x302D, 222); - table.put(0x302C, 232); - table.put(0x302B, 228); - table.put(0x302A, 218); - table.put(0x0F19, 220); - table.put(0x0F18, 220); - table.put(0x0CCD, 9); - table.put(0x0615, 230); - table.put(0x0614, 230); - table.put(0x18A9, 228); - table.put(0x0613, 230); - table.put(0x0612, 230); - table.put(0x0611, 230); - table.put(0x0CBC, 7); - table.put(0x0610, 230); - table.put(0x20EB, 1); - table.put(0x20EA, 1); - table.put(0x20E9, 230); - table.put(0x20E8, 220); - table.put(0x20E7, 230); - table.put(0x20E6, 1); - table.put(0x20E5, 1); - table.put(0x20E1, 230); - table.put(0x20DC, 230); - table.put(0x20DB, 230); - table.put(0x20DA, 1); - table.put(0x20D9, 1); - table.put(0x20D8, 1); - table.put(0x20D7, 230); - table.put(0x20D6, 230); - table.put(0x0E4B, 107); - table.put(0x20D5, 230); - table.put(0x0E4A, 107); - table.put(0x20D4, 230); - table.put(0x20D3, 1); - table.put(0x20D2, 1); - table.put(0x20D1, 230); - return table; - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java index f2e78f33c..5f2578cca 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharClass.java @@ -38,13 +38,12 @@ package org.teavm.classlib.java.util.regex; import java.util.BitSet; /** - * User defined character classes ([abef]). See AbstractCharClass - * documentation for more details. + * User defined character classes ([abef]). See AbstractCharClass documentation + * for more details. * * @author Nikolay A. Kuznetsov */ class CharClass extends AbstractCharClass { - // Flag indicates if we add supplement upper/lower case boolean ci = false; @@ -77,16 +76,16 @@ class CharClass extends AbstractCharClass { } /* - * We can use this method safely even if nonBitSet != null - * due to specific of range constructions in regular expressions. + * We can use this method safely even if nonBitSet != null due to specific + * of range constructions in regular expressions. */ public CharClass add(int ch) { if (ci) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { if (!inverted) { - bits.set(TPattern.getSupplement((char) ch)); + bits.set(TPattern.getSupplement((char)ch)); } else { - bits.clear(TPattern.getSupplement((char) ch)); + bits.clear(TPattern.getSupplement((char)ch)); } } else if (uci && ch > 128) { hasUCI = true; @@ -116,11 +115,10 @@ class CharClass extends AbstractCharClass { } /* - * The difference between add(AbstractCharClass) and union(AbstractCharClass) - * is that add() is used for constructions like "[^abc\\d]" - * (this pattern doesn't match "1") - * while union is used for constructions like "[^abc[\\d]]" - * (this pattern matches "1"). + * The difference between add(AbstractCharClass) and + * union(AbstractCharClass) is that add() is used for constructions like + * "[^abc\\d]" (this pattern doesn't match "1") while union is used for + * constructions like "[^abc[\\d]]" (this pattern matches "1"). */ public CharClass add(final AbstractCharClass cc) { @@ -130,24 +128,24 @@ class CharClass extends AbstractCharClass { if (!invertedSurrogates) { - //A | !B = ! ((A ^ B) & B) + // A | !B = ! ((A ^ B) & B) if (cc.altSurrogates) { lowHighSurrogates.xor(cc.getLowHighSurrogates()); lowHighSurrogates.and(cc.getLowHighSurrogates()); altSurrogates = !altSurrogates; invertedSurrogates = true; - //A | B + // A | B } else { lowHighSurrogates.or(cc.getLowHighSurrogates()); } } else { - //!A | !B = !(A & B) + // !A | !B = !(A & B) if (cc.altSurrogates) { lowHighSurrogates.and(cc.getLowHighSurrogates()); - //!A | B = !(A & !B) + // !A | B = !(A & !B) } else { lowHighSurrogates.andNot(cc.getLowHighSurrogates()); } @@ -156,24 +154,24 @@ class CharClass extends AbstractCharClass { if (!hideBits && cc.getBits() != null) { if (!inverted) { - //A | !B = ! ((A ^ B) & B) + // A | !B = ! ((A ^ B) & B) if (cc.isNegative()) { bits.xor(cc.getBits()); bits.and(cc.getBits()); alt = !alt; inverted = true; - //A | B + // A | B } else { bits.or(cc.getBits()); } } else { - //!A | !B = !(A & B) + // !A | !B = !(A & B) if (cc.isNegative()) { bits.and(cc.getBits()); - //!A | B = !(A & !B) + // !A | B = !(A & !B) } else { bits.andNot(cc.getBits()); } @@ -185,34 +183,34 @@ class CharClass extends AbstractCharClass { if (curAlt && !inverted && bits.isEmpty()) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return cc.contains(ch); } }; - //alt = true; + // alt = true; } else { /* - * We keep the value of alt unchanged for - * constructions like [^[abc]fgb] by using - * the formula a ^ b == !a ^ !b. + * We keep the value of alt unchanged for constructions like + * [^[abc]fgb] by using the formula a ^ b == !a ^ !b. */ if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { - return !((curAlt ^ bits.get(ch)) - || ((curAlt ^ inverted) ^ cc.contains(ch))); + return !((curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch))); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { - return (curAlt ^ bits.get(ch)) - || ((curAlt ^ inverted) ^ cc.contains(ch)); + return (curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch)); } }; - //alt = false + // alt = false } } @@ -222,18 +220,20 @@ class CharClass extends AbstractCharClass { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !(curAlt ^ (nb.contains(ch) || cc.contains(ch))); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return curAlt ^ (nb.contains(ch) || cc.contains(ch)); } }; - //alt = false + // alt = false } } } @@ -246,9 +246,9 @@ class CharClass extends AbstractCharClass { throw new IllegalArgumentException(); if (!ci - //no intersection with surrogate characters - && (end < Character.MIN_SURROGATE - || st > Character.MAX_SURROGATE)) { + // no intersection with surrogate characters + && + (end < Character.MIN_SURROGATE || st > Character.MAX_SURROGATE)) { if (!inverted) { bits.set(st, end + 1); } else { @@ -264,22 +264,20 @@ class CharClass extends AbstractCharClass { // OR operation public void union(final AbstractCharClass clazz) { - if (!mayContainSupplCodepoints - && clazz.mayContainSupplCodepoints) { + if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) { mayContainSupplCodepoints = true; } if (clazz.hasUCI()) this.hasUCI = true; - if (altSurrogates ^ clazz.altSurrogates) { - //!A | B = !(A & !B) + // !A | B = !(A & !B) if (altSurrogates) { lowHighSurrogates.andNot(clazz.getLowHighSurrogates()); - //A | !B = !((A ^ B) & B) + // A | !B = !((A ^ B) & B) } else { lowHighSurrogates.xor(clazz.getLowHighSurrogates()); lowHighSurrogates.and(clazz.getLowHighSurrogates()); @@ -288,11 +286,11 @@ class CharClass extends AbstractCharClass { } else { - //!A | !B = !(A & B) + // !A | !B = !(A & B) if (altSurrogates) { lowHighSurrogates.and(clazz.getLowHighSurrogates()); - //A | B + // A | B } else { lowHighSurrogates.or(clazz.getLowHighSurrogates()); } @@ -301,11 +299,11 @@ class CharClass extends AbstractCharClass { if (!hideBits && clazz.getBits() != null) { if (alt ^ clazz.isNegative()) { - //!A | B = !(A & !B) + // !A | B = !(A & !B) if (alt) { bits.andNot(clazz.getBits()); - //A | !B = !((A ^ B) & B) + // A | !B = !((A ^ B) & B) } else { bits.xor(clazz.getBits()); bits.and(clazz.getBits()); @@ -314,12 +312,12 @@ class CharClass extends AbstractCharClass { } else { - //!A | !B = !(A & B) - if (alt) { + // !A | !B = !(A & B) + if (alt) { bits.and(clazz.getBits()); - //A | B - } else { + // A | B + } else { bits.or(clazz.getBits()); } } @@ -331,35 +329,39 @@ class CharClass extends AbstractCharClass { if (!inverted && bits.isEmpty()) { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !clazz.contains(ch); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return clazz.contains(ch); } }; - //alt = false + // alt = false } } else { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !(clazz.contains(ch) || (curAlt ^ bits.get(ch))); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return clazz.contains(ch) || (curAlt ^ bits.get(ch)); } }; - //alt = false + // alt = false } } hideBits = true; @@ -368,18 +370,20 @@ class CharClass extends AbstractCharClass { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch)); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return (curAlt ^ nb.contains(ch)) || clazz.contains(ch); } }; - //alt = false + // alt = false } } } @@ -387,8 +391,7 @@ class CharClass extends AbstractCharClass { // AND operation public void intersection(final AbstractCharClass clazz) { - if (!mayContainSupplCodepoints - && clazz.mayContainSupplCodepoints) { + if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) { mayContainSupplCodepoints = true; } @@ -397,23 +400,23 @@ class CharClass extends AbstractCharClass { if (altSurrogates ^ clazz.altSurrogates) { - //!A & B = ((A ^ B) & B) + // !A & B = ((A ^ B) & B) if (altSurrogates) { lowHighSurrogates.xor(clazz.getLowHighSurrogates()); lowHighSurrogates.and(clazz.getLowHighSurrogates()); altSurrogates = false; - //A & !B + // A & !B } else { lowHighSurrogates.andNot(clazz.getLowHighSurrogates()); } } else { - //!A & !B = !(A | B) + // !A & !B = !(A | B) if (altSurrogates) { lowHighSurrogates.or(clazz.getLowHighSurrogates()); - //A & B + // A & B } else { lowHighSurrogates.and(clazz.getLowHighSurrogates()); } @@ -423,23 +426,23 @@ class CharClass extends AbstractCharClass { if (alt ^ clazz.isNegative()) { - //!A & B = ((A ^ B) & B) + // !A & B = ((A ^ B) & B) if (alt) { bits.xor(clazz.getBits()); bits.and(clazz.getBits()); alt = false; - //A & !B + // A & !B } else { bits.andNot(clazz.getBits()); } } else { - //!A & !B = !(A | B) + // !A & !B = !(A | B) if (alt) { bits.or(clazz.getBits()); - //A & B + // A & B } else { bits.and(clazz.getBits()); } @@ -452,35 +455,39 @@ class CharClass extends AbstractCharClass { if (!inverted && bits.isEmpty()) { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !clazz.contains(ch); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return clazz.contains(ch); } }; - //alt = false + // alt = false } } else { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !(clazz.contains(ch) && (curAlt ^ bits.get(ch))); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return clazz.contains(ch) && (curAlt ^ bits.get(ch)); } }; - //alt = false + // alt = false } } hideBits = true; @@ -489,18 +496,20 @@ class CharClass extends AbstractCharClass { if (curAlt) { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch)); } }; - //alt = true + // alt = true } else { nonBitSet = new AbstractCharClass() { + @Override public boolean contains(int ch) { return (curAlt ^ nb.contains(ch)) && clazz.contains(ch); } }; - //alt = false + // alt = false } } } @@ -514,10 +523,12 @@ class CharClass extends AbstractCharClass { * @param ch * @return true if character class contains symbol specified; * - * TODO: currently character class implementation based on - * BitSet, but this implementation possibly will be turned to combined - * BitSet(for first 256 symbols) and Black/Red tree for the rest of UTF. + * TODO: currently character class implementation based + * on BitSet, but this implementation possibly will be turned to + * combined BitSet(for first 256 symbols) and Black/Red tree for the + * rest of UTF. */ + @Override public boolean contains(int ch) { if (nonBitSet == null) { return this.alt ^ bits.get(ch); @@ -526,30 +537,34 @@ class CharClass extends AbstractCharClass { } } + @Override protected BitSet getBits() { if (hideBits) return null; return bits; } + @Override protected BitSet getLowHighSurrogates() { return lowHighSurrogates; } + @Override public AbstractCharClass getInstance() { if (nonBitSet == null) { final BitSet bs = getBits(); AbstractCharClass res = new AbstractCharClass() { + @Override public boolean contains(int ch) { return this.alt ^ bs.get(ch); } + @Override public String toString() { StringBuilder temp = new StringBuilder(); - for (int i = bs.nextSetBit(0); i >= 0; i = bs - .nextSetBit(i + 1)) { + for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { temp.append(Character.toChars(i)); temp.append('|'); } @@ -567,7 +582,8 @@ class CharClass extends AbstractCharClass { } } - //for debugging purposes only + // for debugging purposes only + @Override public String toString() { StringBuilder temp = new StringBuilder(); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { @@ -581,6 +597,7 @@ class CharClass extends AbstractCharClass { return temp.toString(); } + @Override public boolean hasUCI() { return hasUCI; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java index 1ee65d84d..4a9d36881 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CharSet.java @@ -48,18 +48,20 @@ class CharSet extends LeafSet { this.ch = ch; } + @Override public int charCount() { return 1; } + @Override public int accepts(int strIndex, CharSequence testString) { return (this.ch == testString.charAt(strIndex)) ? 1 : -1; } - public int find(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; int strLength = matchResult.getRightBound(); while (strIndex < strLength) { @@ -78,10 +80,10 @@ class CharSet extends LeafSet { return super.find(strIndex, testString, matchResult); } - public int findBack(int strIndex, int lastIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; while (lastIndex >= strIndex) { lastIndex = testStr.lastIndexOf(ch, lastIndex); @@ -102,6 +104,7 @@ class CharSet extends LeafSet { return super.findBack(strIndex, lastIndex, testString, matchResult); } + @Override protected String getName() { return "" + ch; //$NON-NLS-1$ } @@ -110,13 +113,14 @@ class CharSet extends LeafSet { return ch; } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { - return ((CharSet) set).getChar() == ch; + return ((CharSet)set).getChar() == ch; } else if (set instanceof RangeSet) { - return ((RangeSet) set).accepts(0, Character.toString(ch)) > 0; + return ((RangeSet)set).accepts(0, Character.toString(ch)) > 0; } else if (set instanceof SupplRangeSet) { - return ((SupplRangeSet) set).contains(ch); + return ((SupplRangeSet)set).contains(ch); } else if (set instanceof SupplCharSet) { return false; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java index 4cfd19cdf..425053726 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeGroupQuantifierSet.java @@ -48,19 +48,23 @@ class CompositeGroupQuantifierSet extends GroupQuantifierSet { /** * Constructs CompositeGroupQuantifierSet - * @param quant - given composite quantifier - * @param innerSet - given group - * @param next - next set after the quantifier + * + * @param quant + * - given composite quantifier + * @param innerSet + * - given group + * @param next + * - next set after the quantifier */ - public CompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, - AbstractSet next, int type, int setCounter) { + public CompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type, + int setCounter) { super(innerSet, next, type); this.quantifier = quant; this.setCounter = setCounter; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int enterCounter = matchResult.getEnterCounter(setCounter); if (!innerSet.hasConsumed(matchResult)) @@ -93,6 +97,7 @@ class CompositeGroupQuantifierSet extends GroupQuantifierSet { quantifier.resetCounter(); } + @Override protected String getName() { return quantifier.toString(); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java index 652759d35..f240b1763 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeQuantifierSet.java @@ -44,14 +44,13 @@ class CompositeQuantifierSet extends LeafQuantifierSet { protected Quantifier quantifier = null; - public CompositeQuantifierSet(Quantifier quant, LeafSet innerSet, - AbstractSet next, int type) { + public CompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); this.quantifier = quant; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int min = quantifier.min(); int max = quantifier.max(); int i = 0; @@ -72,8 +71,8 @@ class CompositeQuantifierSet extends LeafQuantifierSet { for (; i < max; i++) { int shift; - if (stringIndex + leaf.charCount() > matchResult.getRightBound() - || (shift = leaf.accepts(stringIndex, testString)) < 1) { + if (stringIndex + leaf.charCount() > matchResult.getRightBound() || + (shift = leaf.accepts(stringIndex, testString)) < 1) { break; } stringIndex += shift; @@ -94,6 +93,7 @@ class CompositeQuantifierSet extends LeafQuantifierSet { quantifier.resetCounter(); } + @Override protected String getName() { return quantifier.toString(); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java index d6858d663..3bb3f0307 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/CompositeRangeSet.java @@ -98,29 +98,26 @@ package org.teavm.classlib.java.util.regex; /** - * This class is used to split the range that contains surrogate - * characters into two ranges: the first consisting of these surrogate - * characters and the second consisting of all others characters - * from the parent range. - * This class represents the parent range split in such a manner. + * This class is used to split the range that contains surrogate characters into + * two ranges: the first consisting of these surrogate characters and the second + * consisting of all others characters from the parent range. This class + * represents the parent range split in such a manner. */ class CompositeRangeSet extends JointSet { - //range without surrogates + // range without surrogates AbstractSet withoutSurrogates; - //range containing surrogates only + // range containing surrogates only AbstractSet withSurrogates; - public CompositeRangeSet(AbstractSet withoutSurrogates, - AbstractSet withSurrogates, AbstractSet next) { + public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates, AbstractSet next) { this.withoutSurrogates = withoutSurrogates; this.withSurrogates = withSurrogates; setNext(next); } - public CompositeRangeSet(AbstractSet withoutSurrogates, - AbstractSet withSurrogates) { + public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates) { this.withoutSurrogates = withoutSurrogates; this.withSurrogates = withSurrogates; } @@ -128,12 +125,13 @@ class CompositeRangeSet extends JointSet { /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift = withoutSurrogates.matches(stringIndex, testString, matchResult); if (shift < 0) { @@ -148,9 +146,11 @@ class CompositeRangeSet extends JointSet { /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; withSurrogates.setNext(next); @@ -165,16 +165,17 @@ class CompositeRangeSet extends JointSet { return withoutSurrogates; } + @Override protected String getName() { - return "CompositeRangeSet: " + " " - + withoutSurrogates + " " - + withSurrogates; + return "CompositeRangeSet: " + " " + withoutSurrogates + " " + withSurrogates; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } + @Override public boolean first(AbstractSet set) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java index 151c93c10..54559ebe8 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DecomposedCharSet.java @@ -18,16 +18,14 @@ package org.teavm.classlib.java.util.regex; /** - * Represents canonical decomposition of - * Unicode character. Is used when - * CANON_EQ flag of Pattern class - * is specified. + * Represents canonical decomposition of Unicode character. Is used when + * CANON_EQ flag of Pattern class is specified. */ class DecomposedCharSet extends JointSet { /** - * Contains information about number of chars - * that were read for a codepoint last time + * Contains information about number of chars that were read for a codepoint + * last time */ private int readCharsForCodePoint = 1; @@ -39,15 +37,15 @@ class DecomposedCharSet extends JointSet { /** * Decomposition of the Unicode codepoint */ - private int [] decomposedChar; + private int[] decomposedChar; /** - * Length of useful part of decomposedChar - * decomposedCharLength <= decomposedChar.length + * Length of useful part of decomposedChar decomposedCharLength <= + * decomposedChar.length */ private int decomposedCharLength; - public DecomposedCharSet(int [] decomposedChar, int decomposedCharLength) { + public DecomposedCharSet(int[] decomposedChar, int decomposedCharLength) { this.decomposedChar = decomposedChar; this.decomposedCharLength = decomposedCharLength; } @@ -55,39 +53,43 @@ class DecomposedCharSet extends JointSet { /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; } + @Override public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { /* - * All decompositions have length that - * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH + * All decompositions have length that is less or equal + * Lexer.MAX_DECOMPOSITION_LENGTH */ - int [] decCurCodePoint; - int [] decCodePoint = new int [Lexer.MAX_DECOMPOSITION_LENGTH]; + int[] decCurCodePoint; + int[] decCodePoint = new int[Lexer.MAX_DECOMPOSITION_LENGTH]; int readCodePoints = 0; int rightBound = matchResult.getRightBound(); int curChar; int i = 0; if (strIndex >= rightBound) { - return -1; + return -1; } /* - * We read testString and decompose it gradually to compare with - * this decomposedChar at position strIndex + * We read testString and decompose it gradually to compare with this + * decomposedChar at position strIndex */ curChar = codePointAt(strIndex, testString, rightBound); strIndex += readCharsForCodePoint; @@ -104,45 +106,43 @@ class DecomposedCharSet extends JointSet { curChar = codePointAt(strIndex, testString, rightBound); /* - * Read testString until we met a decomposed char boundary - * and decompose obtained portion of testString + * Read testString until we met a decomposed char boundary and + * decompose obtained portion of testString */ - while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) - && !Lexer.isDecomposedCharBoundary(curChar)) { + while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) && !Lexer.isDecomposedCharBoundary(curChar)) { - if (Lexer.hasDecompositionNonNullCanClass(curChar)) { + if (Lexer.hasDecompositionNonNullCanClass(curChar)) { - /* - * A few codepoints have decompositions and non null - * canonical classes, we have to take them into - * consideration, but general rule is: - * if canonical class != 0 then no decomposition - */ - decCurCodePoint = Lexer.getDecomposition(curChar); + /* + * A few codepoints have decompositions and non null + * canonical classes, we have to take them into + * consideration, but general rule is: if canonical class != + * 0 then no decomposition + */ + decCurCodePoint = Lexer.getDecomposition(curChar); - /* - * Length of such decomposition is 1 or 2. See - * UnicodeData file - * http://www.unicode.org/Public/4.0-Update - * /UnicodeData-4.0.0.txt - */ - if (decCurCodePoint.length == 2) { - decCodePoint[readCodePoints++] = decCurCodePoint[0]; - decCodePoint[readCodePoints++] = decCurCodePoint[1]; - } else { - decCodePoint[readCodePoints++] = decCurCodePoint[0]; - } + /* + * Length of such decomposition is 1 or 2. See UnicodeData + * file http://www.unicode.org/Public/4.0-Update + * /UnicodeData-4.0.0.txt + */ + if (decCurCodePoint.length == 2) { + decCodePoint[readCodePoints++] = decCurCodePoint[0]; + decCodePoint[readCodePoints++] = decCurCodePoint[1]; } else { - decCodePoint[readCodePoints++] = curChar; + decCodePoint[readCodePoints++] = decCurCodePoint[0]; } + } else { + decCodePoint[readCodePoints++] = curChar; + } - strIndex += readCharsForCodePoint; + strIndex += readCharsForCodePoint; - if (strIndex < rightBound) { - curChar = codePointAt(strIndex, testString, rightBound); - } else { - break; - } + if (strIndex < rightBound) { + curChar = codePointAt(strIndex, testString, rightBound); + } else { + break; + } } } @@ -150,30 +150,29 @@ class DecomposedCharSet extends JointSet { * Some optimization since length of decomposed char is <= 3 usually */ switch (readCodePoints) { - case 0: - case 1: - case 2: - break; + case 0: + case 1: + case 2: + break; - case 3: - int i1 = Lexer.getCanonicalClass(decCodePoint[1]); - int i2 = Lexer.getCanonicalClass(decCodePoint[2]); + case 3: + int i1 = Lexer.getCanonicalClass(decCodePoint[1]); + int i2 = Lexer.getCanonicalClass(decCodePoint[2]); - if ((i2 != 0) && (i1 > i2)) { - i1 = decCodePoint[1]; - decCodePoint[1] = decCodePoint[2]; - decCodePoint[2] = i1; - } - break; + if ((i2 != 0) && (i1 > i2)) { + i1 = decCodePoint[1]; + decCodePoint[1] = decCodePoint[2]; + decCodePoint[2] = i1; + } + break; - default: - decCodePoint = Lexer.getCanonicalOrder(decCodePoint, - readCodePoints); - } + default: + decCodePoint = Lexer.getCanonicalOrder(decCodePoint, readCodePoints); + } /* - * Compare decomposedChar with decomposed char - * that was just read from testString + * Compare decomposedChar with decomposed char that was just read from + * testString */ if (readCodePoints != decomposedCharLength) { return -1; @@ -205,6 +204,7 @@ class DecomposedCharSet extends JointSet { return decomposedCharUTF16; } + @Override protected String getName() { return "decomposed char:" + getDecomposedChar(); //$NON-NLS-1$ } @@ -212,17 +212,19 @@ class DecomposedCharSet extends JointSet { /** * Reads Unicode codepoint from input. * - * @param strIndex - index to read codepoint at - * @param testString - input - * @param matchResult - auxiliary object + * @param strIndex + * - index to read codepoint at + * @param testString + * - input + * @param matchResult + * - auxiliary object * @return codepoint at given strIndex at testString and */ - public int codePointAt(int strIndex, CharSequence testString, - int rightBound) { + public int codePointAt(int strIndex, CharSequence testString, int rightBound) { /* - * We store information about number of codepoints - * we read at variable readCharsForCodePoint. + * We store information about number of codepoints we read at variable + * readCharsForCodePoint. */ int curChar; @@ -232,7 +234,7 @@ class DecomposedCharSet extends JointSet { char low = testString.charAt(strIndex); if (Character.isSurrogatePair(high, low)) { - char [] curCodePointUTF16 = new char [] {high, low}; + char[] curCodePointUTF16 = new char[] { high, low }; curChar = Character.codePointAt(curCodePointUTF16, 0); readCharsForCodePoint = 2; } else { @@ -245,15 +247,14 @@ class DecomposedCharSet extends JointSet { return curChar; } + @Override public boolean first(AbstractSet set) { - return (set instanceof DecomposedCharSet) - ? ((DecomposedCharSet) set).getDecomposedChar() - .equals(getDecomposedChar()) - : true; + return (set instanceof DecomposedCharSet) ? ((DecomposedCharSet)set).getDecomposedChar().equals( + getDecomposedChar()) : true; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } } - diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java index a3f0f56af..e11126a08 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllQuantifierSet.java @@ -21,8 +21,8 @@ package org.teavm.classlib.java.util.regex; /** - * Special node for ".*" construction for any character - * including line terminators. + * Special node for ".*" construction for any character including line + * terminators. * * @author Nikolay A. Kuznetsov */ @@ -32,8 +32,8 @@ class DotAllQuantifierSet extends QuantifierSet { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); @@ -43,8 +43,8 @@ class DotAllQuantifierSet extends QuantifierSet { return next.findBack(stringIndex, strLength, testString, matchResult); } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); if (next.findBack(stringIndex, strLength, testString, matchResult) >= 0) { return stringIndex; @@ -53,6 +53,7 @@ class DotAllQuantifierSet extends QuantifierSet { } } + @Override protected String getName() { return ""; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java index d1b7f9e48..87a135a2f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotAllSet.java @@ -27,45 +27,48 @@ package org.teavm.classlib.java.util.regex; */ class DotAllSet extends JointSet { - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - int strLength = matchResult.getRightBound(); + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { + int strLength = matchResult.getRightBound(); - if (stringIndex + 1 > strLength) { - matchResult.hitEnd = true; - return -1; - } + if (stringIndex + 1 > strLength) { + matchResult.hitEnd = true; + return -1; + } - char high = testString.charAt(stringIndex); + char high = testString.charAt(stringIndex); - if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) { - char low = testString.charAt(stringIndex + 1); + if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) { + char low = testString.charAt(stringIndex + 1); - if (Character.isSurrogatePair(high, low)) { - return next.matches(stringIndex + 2, testString, matchResult); - } - } - return next.matches(stringIndex + 1, testString, matchResult); - } - - protected String getName() { - return "DotAll"; //$NON-NLS-1$ + if (Character.isSurrogatePair(high, low)) { + return next.matches(stringIndex + 2, testString, matchResult); + } + } + return next.matches(stringIndex + 1, testString, matchResult); } + @Override + protected String getName() { + return "DotAll"; + } + @Override public AbstractSet getNext() { return this.next; } + @Override public void setNext(AbstractSet next) { this.next = next; } + @Override public int getType() { return AbstractSet.TYPE_DOTSET; } - + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java index 201e7984e..09e74e0b6 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotQuantifierSet.java @@ -21,9 +21,8 @@ package org.teavm.classlib.java.util.regex; /** - * Special node for ".*" construction. - * The main idea here is to find line terminator and try to find the rest of - * the construction from this point. + * Special node for ".*" construction. The main idea here is to find line + * terminator and try to find the rest of the construction from this point. * * @author Nikolay A. Kuznetsov */ @@ -31,14 +30,13 @@ class DotQuantifierSet extends QuantifierSet { AbstractLineTerminator lt; - public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type, - AbstractLineTerminator lt) { + public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type, AbstractLineTerminator lt) { super(innerSet, next, type); this.lt = lt; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); @@ -55,8 +53,8 @@ class DotQuantifierSet extends QuantifierSet { return next.findBack(stringIndex, startSearch, testString, matchResult); } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { // String testStr = testString.toString(); int strLength = matchResult.getRightBound(); // 1. skip line terminators ??? @@ -77,8 +75,7 @@ class DotQuantifierSet extends QuantifierSet { if (nextSearch < 0) { nextSearch = strLength; } - nextSearch = next - .findBack(res, nextSearch, testString, matchResult); + nextSearch = next.findBack(res, nextSearch, testString, matchResult); res = (res < nextSearch) ? nextSearch : res; } else { return -1; @@ -86,11 +83,20 @@ class DotQuantifierSet extends QuantifierSet { // 4. find left boundary of this search // // - int leftBound = (res > 0) ? findBackLineTerminator(stringIndex, - res - 1, testString)/* testStr.lastIndexOf('\n', res - 1) */ - : (res == 0) ? 0 : -1; - res = (leftBound >= stringIndex) ? ((leftBound < res) ? leftBound + 1 - : leftBound) : stringIndex; + int leftBound = (res > 0) ? findBackLineTerminator(stringIndex, res - 1, testString)/* + * testStr + * . + * lastIndexOf + * ( + * '\n' + * , + * res + * - + * 1 + * ) + */ + : (res == 0) ? 0 : -1; + res = (leftBound >= stringIndex) ? ((leftBound < res) ? leftBound + 1 : leftBound) : stringIndex; return res; } @@ -116,6 +122,7 @@ class DotQuantifierSet extends QuantifierSet { return -1; } + @Override protected String getName() { return ""; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java index b42304a62..25086e364 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/DotSet.java @@ -34,8 +34,8 @@ final class DotSet extends JointSet { this.lt = lt; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); if (stringIndex + 1 > strLength) { @@ -48,32 +48,35 @@ final class DotSet extends JointSet { char low = testString.charAt(stringIndex + 1); if (Character.isSurrogatePair(high, low)) { - return lt.isLineTerminator(Character.toCodePoint(high, low))? -1 - : next.matches(stringIndex + 2, testString, matchResult); + return lt.isLineTerminator(Character.toCodePoint(high, low)) ? -1 : next.matches(stringIndex + 2, + testString, matchResult); } } - return lt.isLineTerminator(high)? -1 - : next.matches(stringIndex + 1, testString, matchResult); + return lt.isLineTerminator(high) ? -1 : next.matches(stringIndex + 1, testString, matchResult); } + @Override protected String getName() { return "."; //$NON-NLS-1$ } - + @Override public AbstractSet getNext() { return this.next; } + @Override public void setNext(AbstractSet next) { this.next = next; } + @Override public int getType() { return AbstractSet.TYPE_DOTSET; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java index ee8ffdd75..bb554b1ec 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOISet.java @@ -27,10 +27,9 @@ package org.teavm.classlib.java.util.regex; */ class EOISet extends AbstractSet { - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - int rightBound = matchResult.hasTransparentBounds() ? testString - .length() : matchResult.getRightBound(); + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { + int rightBound = matchResult.hasTransparentBounds() ? testString.length() : matchResult.getRightBound(); if (stringIndex < rightBound) return -1; @@ -43,11 +42,13 @@ class EOISet extends AbstractSet { /** * Returns false, enough for quantifiers */ + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "EOI"; //$NON-NLS-1$ + return "EOI"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java index 7464f3b72..f94414dfd 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EOLSet.java @@ -26,17 +26,15 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ final class EOLSet extends AbstractSet { - private int consCounter; public EOLSet(int counter) { this.consCounter = counter; } - public int matches(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { - int rightBound = matchResult.hasAnchoringBounds() ? matchResult - .getRightBound() : testString.length(); + @Override + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length(); if (strIndex >= rightBound) { matchResult.setConsumed(consCounter, 0); @@ -44,16 +42,15 @@ final class EOLSet extends AbstractSet { } // check final line terminator; - if ((rightBound - strIndex) == 2 && testString.charAt(strIndex) == '\r' - && testString.charAt(strIndex + 1) == '\n') { + if ((rightBound - strIndex) == 2 && testString.charAt(strIndex) == '\r' && + testString.charAt(strIndex + 1) == '\n') { matchResult.setConsumed(consCounter, 0); return next.matches(strIndex, testString, matchResult); } char ch; - if ((rightBound - strIndex) == 1 - && (((ch = testString.charAt(strIndex)) == '\n' || ch == '\r' - || ch == '\u0085' || (ch | 1) == '\u2029'))) { + if ((rightBound - strIndex) == 1 && + (((ch = testString.charAt(strIndex)) == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'))) { matchResult.setConsumed(consCounter, 0); return next.matches(strIndex, testString, matchResult); } @@ -61,6 +58,7 @@ final class EOLSet extends AbstractSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons; boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); @@ -68,7 +66,8 @@ final class EOLSet extends AbstractSet { return res; } + @Override protected String getName() { - return ""; //$NON-NLS-1$ + return ""; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java index 4f0529db8..8e092b52c 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/EmptySet.java @@ -26,39 +26,36 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class EmptySet extends LeafSet { - public EmptySet(AbstractSet next) { super(next); charCount = 0; } - /* - * @see java.util.regex.LeafSet#accepts(int, java.lang.CharSequence) - */ + @Override public int accepts(int stringIndex, CharSequence testString) { return 0; } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); int startStr = matchResult.getLeftBound(); while (stringIndex <= strLength) { - //check for supplementary codepoints + // check for supplementary codepoints if (stringIndex < strLength) { char low = testString.charAt(stringIndex); if (Character.isLowSurrogate(low)) { - if (stringIndex > startStr) { - char high = testString.charAt(stringIndex - 1); - if (Character.isHighSurrogate(high)) { - stringIndex++; - continue; - } - } + if (stringIndex > startStr) { + char high = testString.charAt(stringIndex - 1); + if (Character.isHighSurrogate(high)) { + stringIndex++; + continue; + } + } } } @@ -71,26 +68,26 @@ class EmptySet extends LeafSet { return -1; } - public int findBack(int stringIndex, int startSearch, - CharSequence testString, MatchResultImpl matchResult) { + @Override + public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); int startStr = matchResult.getLeftBound(); while (startSearch >= stringIndex) { - //check for supplementary codepoints + // check for supplementary codepoints if (startSearch < strLength) { char low = testString.charAt(startSearch); if (Character.isLowSurrogate(low)) { - if (startSearch > startStr) { - char high = testString.charAt(startSearch - 1); - if (Character.isHighSurrogate(high)) { - startSearch--; - continue; - } - } + if (startSearch > startStr) { + char high = testString.charAt(startSearch - 1); + if (Character.isHighSurrogate(high)) { + startSearch--; + continue; + } + } } } @@ -103,15 +100,13 @@ class EmptySet extends LeafSet { return -1; } - /* - * @see java.util.regex.AbstractSet#getName() - */ + @Override protected String getName() { - return ""; //$NON-NLS-1$ + return ""; } + @Override public boolean hasConsumed(MatchResultImpl mr) { return false; } - } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java index 5d7007806..0d829a366 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FSet.java @@ -22,6 +22,7 @@ package org.teavm.classlib.java.util.regex; /** * The node which marks end of the particular group. + * * @author Nikolay A. Kuznetsov */ class FSet extends AbstractSet { @@ -36,8 +37,8 @@ class FSet extends AbstractSet { this.groupIndex = groupIndex; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int end = matchResult.getEnd(groupIndex); matchResult.setEnd(groupIndex, stringIndex); int shift = next.matches(stringIndex, testString, matchResult); @@ -54,10 +55,12 @@ class FSet extends AbstractSet { return groupIndex; } + @Override protected String getName() { return "fSet"; //$NON-NLS-1$ } + @Override public boolean hasConsumed(MatchResultImpl mr) { return false; } @@ -69,15 +72,17 @@ class FSet extends AbstractSet { */ static class PossessiveFSet extends AbstractSet { - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { return stringIndex; } + @Override protected String getName() { return "posFSet"; //$NON-NLS-1$ } + @Override public boolean hasConsumed(MatchResultImpl mr) { return false; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java index d9c12a177..d67ad03d2 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/FinalSet.java @@ -31,10 +31,9 @@ class FinalSet extends FSet { super(0); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - if (matchResult.mode() == TMatcher.MODE_FIND - || stringIndex == matchResult.getRightBound()) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { + if (matchResult.mode() == TMatcher.MODE_FIND || stringIndex == matchResult.getRightBound()) { matchResult.setValid(); matchResult.setEnd(0, stringIndex); return stringIndex; @@ -42,6 +41,7 @@ class FinalSet extends FSet { return -1; } + @Override protected String getName() { return "FinalSet"; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java index 5f86f59a3..38c402cba 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/GroupQuantifierSet.java @@ -21,9 +21,8 @@ package org.teavm.classlib.java.util.regex; /** - * Default quantifier over groups, in fact this type of quantifier is - * generally used for constructions we cant identify number of characters they - * consume. + * Default quantifier over groups, in fact this type of quantifier is generally + * used for constructions we cant identify number of characters they consume. * * @author Nikolay A. Kuznetsov */ @@ -33,12 +32,12 @@ class GroupQuantifierSet extends QuantifierSet { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (!innerSet.hasConsumed(matchResult)) return next.matches(stringIndex, testString, matchResult);// return - // -1; + // -1; int nextIndex = innerSet.matches(stringIndex, testString, matchResult); @@ -49,6 +48,7 @@ class GroupQuantifierSet extends QuantifierSet { } } + @Override protected String getName() { return ""; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java index 357ab3608..3a41acc93 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HangulDecomposedCharSet.java @@ -18,17 +18,15 @@ package org.teavm.classlib.java.util.regex; /** - * Represents canonical decomposition of - * Hangul syllable. Is used when - * CANON_EQ flag of Pattern class - * is specified. + * Represents canonical decomposition of Hangul syllable. Is used when CANON_EQ + * flag of Pattern class is specified. */ class HangulDecomposedCharSet extends JointSet { /** * Decomposed Hangul syllable. */ - private char [] decomposedChar; + private char[] decomposedChar; /** * String representing syllable @@ -36,12 +34,12 @@ class HangulDecomposedCharSet extends JointSet { private String decomposedCharUTF16 = null; /** - * Length of useful part of decomposedChar - * decomposedCharLength <= decomposedChar.length + * Length of useful part of decomposedChar decomposedCharLength <= + * decomposedChar.length */ private int decomposedCharLength; - public HangulDecomposedCharSet(char [] decomposedChar, int decomposedCharLength) { + public HangulDecomposedCharSet(char[] decomposedChar, int decomposedCharLength) { this.decomposedChar = decomposedChar; this.decomposedCharLength = decomposedCharLength; } @@ -49,15 +47,18 @@ class HangulDecomposedCharSet extends JointSet { /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; } @@ -68,26 +69,25 @@ class HangulDecomposedCharSet extends JointSet { * @return - string representation. */ private String getDecomposedChar() { - return (decomposedCharUTF16 == null) - ? (decomposedCharUTF16 = new String(decomposedChar)) - : decomposedCharUTF16; + return (decomposedCharUTF16 == null) ? (decomposedCharUTF16 = new String(decomposedChar)) : decomposedCharUTF16; } + @Override protected String getName() { return "decomposed Hangul syllable:" + getDecomposedChar(); //$NON-NLS-1$ } + @Override public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { /* - * All decompositions for Hangul syllables have length that - * is less or equal Lexer.MAX_DECOMPOSITION_LENGTH + * All decompositions for Hangul syllables have length that is less or + * equal Lexer.MAX_DECOMPOSITION_LENGTH */ int rightBound = matchResult.getRightBound(); int SyllIndex = 0; - int [] decompSyllable = new int [Lexer - .MAX_HANGUL_DECOMPOSITION_LENGTH]; - int [] decompCurSymb; + int[] decompSyllable = new int[Lexer.MAX_HANGUL_DECOMPOSITION_LENGTH]; + int[] decompCurSymb; char curSymb; /* @@ -108,8 +108,8 @@ class HangulDecomposedCharSet extends JointSet { if (decompCurSymb == null) { /* - * We deal with ordinary letter or sequence of jamos - * at strIndex at testString. + * We deal with ordinary letter or sequence of jamos at strIndex at + * testString. */ decompSyllable[SyllIndex++] = curSymb; LIndex = curSymb - Lexer.LBase; @@ -124,14 +124,14 @@ class HangulDecomposedCharSet extends JointSet { if (strIndex < rightBound) { curSymb = testString.charAt(strIndex); - VIndex = curSymb - Lexer.VBase; + VIndex = curSymb - Lexer.VBase; } if ((VIndex < 0) || (VIndex >= Lexer.VCount)) { /* - * Single L jamo doesn't compose Hangul syllable, - * so doesn't match + * Single L jamo doesn't compose Hangul syllable, so doesn't + * match */ return -1; } @@ -140,39 +140,31 @@ class HangulDecomposedCharSet extends JointSet { if (strIndex < rightBound) { curSymb = testString.charAt(strIndex); - TIndex = curSymb - Lexer.TBase; + TIndex = curSymb - Lexer.TBase; } if ((TIndex < 0) || (TIndex >= Lexer.TCount)) { /* - * We deal with LV syllable at testString, so - * compare it to this + * We deal with LV syllable at testString, so compare it to this */ - return ((decomposedCharLength == 2) - && (decompSyllable[0] == decomposedChar[0]) - && (decompSyllable[1] == decomposedChar[1])) - ? next.matches(strIndex, testString, matchResult) - : -1; + return ((decomposedCharLength == 2) && (decompSyllable[0] == decomposedChar[0]) && (decompSyllable[1] == decomposedChar[1])) ? next + .matches(strIndex, testString, matchResult) : -1; } strIndex++; decompSyllable[SyllIndex++] = curSymb; /* - * We deal with LVT syllable at testString, so - * compare it to this + * We deal with LVT syllable at testString, so compare it to this */ - return ((decomposedCharLength == 3) - && (decompSyllable[0] == decomposedChar[0]) - && (decompSyllable[1] == decomposedChar[1]) - && (decompSyllable[2] == decomposedChar[2])) - ? next.matches(strIndex, testString, matchResult) - : -1; + return ((decomposedCharLength == 3) && (decompSyllable[0] == decomposedChar[0]) && + (decompSyllable[1] == decomposedChar[1]) && (decompSyllable[2] == decomposedChar[2])) ? next + .matches(strIndex, testString, matchResult) : -1; } else { /* - * We deal with Hangul syllable at strIndex at testString. - * So we decomposed it to compare with this. + * We deal with Hangul syllable at strIndex at testString. So we + * decomposed it to compare with this. */ int i = 0; @@ -189,15 +181,14 @@ class HangulDecomposedCharSet extends JointSet { } } + @Override public boolean first(AbstractSet set) { - return (set instanceof HangulDecomposedCharSet) - ? ((HangulDecomposedCharSet) set).getDecomposedChar() - .equals(getDecomposedChar()) - : true; + return (set instanceof HangulDecomposedCharSet) ? ((HangulDecomposedCharSet)set).getDecomposedChar().equals( + getDecomposedChar()) : true; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } } - diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java deleted file mode 100644 index a47417602..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HashDecompositions.java +++ /dev/null @@ -1,2072 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.teavm.classlib.java.util.regex; - -/** - * This class gives us a hashtable that contains canonical - * decomposition mappings that are generated from - * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. - */ -class HashDecompositions{ - - private static IntArrHash table = null; - - private HashDecompositions() { - } - - public static IntArrHash getHashDecompositions() { - if (table != null) { - return table; - } else { - table = new IntArrHash(2032); - table.put(0x0929, new int [] {0x0928, 0x093C}); - table.put(0xFA9F, new int [] {0x72AF}); - table.put(0xFA9E, new int [] {0x7235}); - table.put(0xFA9D, new int [] {0x77A7}); - table.put(0xFA9C, new int [] {0x716E}); - table.put(0xFA9B, new int [] {0x701E}); - table.put(0xFA9A, new int [] {0x6F22}); - table.put(0x0BCC, new int [] {0x0BC6, 0x0BD7}); - table.put(0x0BCB, new int [] {0x0BC7, 0x0BBE}); - table.put(0x0BCA, new int [] {0x0BC6, 0x0BBE}); - table.put(0xFA99, new int [] {0x6ECB}); - table.put(0xFA98, new int [] {0x6EDB}); - table.put(0xFA97, new int [] {0x6D41}); - table.put(0xFA96, new int [] {0x6BBA}); - table.put(0xFA95, new int [] {0x6B79}); - table.put(0xFA94, new int [] {0x6756}); - table.put(0xFA93, new int [] {0x671B}); - table.put(0xFA92, new int [] {0x6717}); - table.put(0x1D164, new int [] {0x1D158, 0x1D165, 0x1D172}); - table.put(0xFA91, new int [] {0x6674}); - table.put(0x1D163, new int [] {0x1D158, 0x1D165, 0x1D171}); - table.put(0xFA90, new int [] {0x6556}); - table.put(0x1D162, new int [] {0x1D158, 0x1D165, 0x1D170}); - table.put(0x1D161, new int [] {0x1D158, 0x1D165, 0x1D16F}); - table.put(0x1D160, new int [] {0x1D158, 0x1D165, 0x1D16E}); - table.put(0xFA8F, new int [] {0x6452}); - table.put(0xFA8E, new int [] {0x641C}); - table.put(0xFA8D, new int [] {0x63C4}); - table.put(0xFA8C, new int [] {0x6234}); - table.put(0x1D15F, new int [] {0x1D158, 0x1D165}); - table.put(0xFA8B, new int [] {0x61F2}); - table.put(0x1D15E, new int [] {0x1D157, 0x1D165}); - table.put(0xFA8A, new int [] {0x6160}); - table.put(0xFA89, new int [] {0x618E}); - table.put(0xFA88, new int [] {0x6108}); - table.put(0xFA87, new int [] {0x614E}); - table.put(0xFA86, new int [] {0x60D8}); - table.put(0xFA85, new int [] {0x5FAD}); - table.put(0xFA84, new int [] {0x5F69}); - table.put(0xFA83, new int [] {0x5ED9}); - table.put(0xFA82, new int [] {0x5ED2}); - table.put(0xFA81, new int [] {0x5B28}); - table.put(0xFA80, new int [] {0x5A62}); - table.put(0xFA7F, new int [] {0x5954}); - table.put(0x1FFD, new int [] {0x00B4}); - table.put(0xFA7E, new int [] {0x5944}); - table.put(0x1FFC, new int [] {0x03A9, 0x0345}); - table.put(0xFA7D, new int [] {0x58B3}); - table.put(0x1FFB, new int [] {0x03A9, 0x0301}); - table.put(0xFA7C, new int [] {0x585A}); - table.put(0x1FFA, new int [] {0x03A9, 0x0300}); - table.put(0xFA7B, new int [] {0x55E2}); - table.put(0xFA7A, new int [] {0x5599}); - table.put(0x219B, new int [] {0x2192, 0x0338}); - table.put(0x219A, new int [] {0x2190, 0x0338}); - table.put(0x1FF9, new int [] {0x039F, 0x0301}); - table.put(0x1FF8, new int [] {0x039F, 0x0300}); - table.put(0xFA79, new int [] {0x5555}); - table.put(0x1FF7, new int [] {0x03C9, 0x0342, 0x0345}); - table.put(0xFA78, new int [] {0x559D}); - table.put(0x1FF6, new int [] {0x03C9, 0x0342}); - table.put(0xFA77, new int [] {0x52FA}); - table.put(0xFA76, new int [] {0x52C7}); - table.put(0x1FF4, new int [] {0x03C9, 0x0301, 0x0345}); - table.put(0xFA75, new int [] {0x5180}); - table.put(0x1FF3, new int [] {0x03C9, 0x0345}); - table.put(0xFA74, new int [] {0x5145}); - table.put(0x1FF2, new int [] {0x03C9, 0x0300, 0x0345}); - table.put(0xFA73, new int [] {0x4F80}); - table.put(0xFA72, new int [] {0x5168}); - table.put(0xFA71, new int [] {0x51B5}); - table.put(0xFA70, new int [] {0x4E26}); - table.put(0x1FEF, new int [] {0x0060}); - table.put(0x1FEE, new int [] {0x00A8, 0x0301}); - table.put(0x1FED, new int [] {0x00A8, 0x0300}); - table.put(0x1FEC, new int [] {0x03A1, 0x0314}); - table.put(0x1FEB, new int [] {0x03A5, 0x0301}); - table.put(0x1FEA, new int [] {0x03A5, 0x0300}); - table.put(0xFA6A, new int [] {0x983B}); - table.put(0x1FE9, new int [] {0x03A5, 0x0304}); - table.put(0x1FE8, new int [] {0x03A5, 0x0306}); - table.put(0xFA69, new int [] {0x97FF}); - table.put(0x1FE7, new int [] {0x03C5, 0x0308, 0x0342}); - table.put(0xFA68, new int [] {0x96E3}); - table.put(0x1FE6, new int [] {0x03C5, 0x0342}); - table.put(0xFA67, new int [] {0x9038}); - table.put(0x1FE5, new int [] {0x03C1, 0x0314}); - table.put(0xFA66, new int [] {0x8FB6}); - table.put(0x1FE4, new int [] {0x03C1, 0x0313}); - table.put(0xFA65, new int [] {0x8D08}); - table.put(0x1FE3, new int [] {0x03C5, 0x0308, 0x0301}); - table.put(0xFA64, new int [] {0x8CD3}); - table.put(0x1FE2, new int [] {0x03C5, 0x0308, 0x0300}); - table.put(0xFA63, new int [] {0x8B39}); - table.put(0x1FE1, new int [] {0x03C5, 0x0304}); - table.put(0xFA62, new int [] {0x8B01}); - table.put(0x1FE0, new int [] {0x03C5, 0x0306}); - table.put(0xFA61, new int [] {0x8996}); - table.put(0xFA60, new int [] {0x8910}); - table.put(0x1FDF, new int [] {0x1FFE, 0x0342}); - table.put(0x1FDE, new int [] {0x1FFE, 0x0301}); - table.put(0xFA5F, new int [] {0x8457}); - table.put(0x1FDD, new int [] {0x1FFE, 0x0300}); - table.put(0xFA5E, new int [] {0x8279}); - table.put(0xFA5D, new int [] {0x8279}); - table.put(0x1FDB, new int [] {0x0399, 0x0301}); - table.put(0xFA5C, new int [] {0x81ED}); - table.put(0x1FDA, new int [] {0x0399, 0x0300}); - table.put(0xFA5B, new int [] {0x8005}); - table.put(0xFA5A, new int [] {0x7F72}); - table.put(0x1FD9, new int [] {0x0399, 0x0304}); - table.put(0x1FD8, new int [] {0x0399, 0x0306}); - table.put(0xFA59, new int [] {0x7E41}); - table.put(0x1FD7, new int [] {0x03B9, 0x0308, 0x0342}); - table.put(0xFA58, new int [] {0x7E09}); - table.put(0x1FD6, new int [] {0x03B9, 0x0342}); - table.put(0xFA57, new int [] {0x7DF4}); - table.put(0xFA56, new int [] {0x7BC0}); - table.put(0xFA55, new int [] {0x7A81}); - table.put(0x1FD3, new int [] {0x03B9, 0x0308, 0x0301}); - table.put(0xFA54, new int [] {0x7A40}); - table.put(0x1FD2, new int [] {0x03B9, 0x0308, 0x0300}); - table.put(0xFA53, new int [] {0x798E}); - table.put(0x1FD1, new int [] {0x03B9, 0x0304}); - table.put(0xFA52, new int [] {0x798D}); - table.put(0x1FD0, new int [] {0x03B9, 0x0306}); - table.put(0xFA51, new int [] {0x795D}); - table.put(0xFA50, new int [] {0x7956}); - table.put(0x1FCF, new int [] {0x1FBF, 0x0342}); - table.put(0x1FCE, new int [] {0x1FBF, 0x0301}); - table.put(0xFA4F, new int [] {0x7950}); - table.put(0x1FCD, new int [] {0x1FBF, 0x0300}); - table.put(0xFA4E, new int [] {0x7948}); - table.put(0x1FCC, new int [] {0x0397, 0x0345}); - table.put(0xFA4D, new int [] {0x7949}); - table.put(0x1FCB, new int [] {0x0397, 0x0301}); - table.put(0xFA4C, new int [] {0x793E}); - table.put(0x1FCA, new int [] {0x0397, 0x0300}); - table.put(0xFA4B, new int [] {0x7891}); - table.put(0xFA4A, new int [] {0x7422}); - table.put(0x1FC9, new int [] {0x0395, 0x0301}); - table.put(0x1FC8, new int [] {0x0395, 0x0300}); - table.put(0xFA49, new int [] {0x722B}); - table.put(0x1FC7, new int [] {0x03B7, 0x0342, 0x0345}); - table.put(0xFA48, new int [] {0x716E}); - table.put(0x1FC6, new int [] {0x03B7, 0x0342}); - table.put(0xFA47, new int [] {0x6F22}); - table.put(0xFA46, new int [] {0x6E1A}); - table.put(0x1FC4, new int [] {0x03B7, 0x0301, 0x0345}); - table.put(0xFA45, new int [] {0x6D77}); - table.put(0x1FC3, new int [] {0x03B7, 0x0345}); - table.put(0xFA44, new int [] {0x6885}); - table.put(0x1FC2, new int [] {0x03B7, 0x0300, 0x0345}); - table.put(0xFA43, new int [] {0x6691}); - table.put(0x1FC1, new int [] {0x00A8, 0x0342}); - table.put(0xFA42, new int [] {0x65E2}); - table.put(0xFA41, new int [] {0x654F}); - table.put(0xFA40, new int [] {0x61F2}); - table.put(0x1FBE, new int [] {0x03B9}); - table.put(0xFA3F, new int [] {0x618E}); - table.put(0xFA3E, new int [] {0x6168}); - table.put(0x1FBC, new int [] {0x0391, 0x0345}); - table.put(0xFA3D, new int [] {0x6094}); - table.put(0x1FBB, new int [] {0x0391, 0x0301}); - table.put(0xFA3C, new int [] {0x5C6E}); - table.put(0x1FBA, new int [] {0x0391, 0x0300}); - table.put(0xFA3B, new int [] {0x5C64}); - table.put(0xFA3A, new int [] {0x58A8}); - table.put(0x1FB9, new int [] {0x0391, 0x0304}); - table.put(0x1FB8, new int [] {0x0391, 0x0306}); - table.put(0xFA39, new int [] {0x5840}); - table.put(0x1FB7, new int [] {0x03B1, 0x0342, 0x0345}); - table.put(0xFA38, new int [] {0x5668}); - table.put(0x1FB6, new int [] {0x03B1, 0x0342}); - table.put(0xFA37, new int [] {0x5606}); - table.put(0xFA36, new int [] {0x559D}); - table.put(0x1FB4, new int [] {0x03B1, 0x0301, 0x0345}); - table.put(0xFA35, new int [] {0x5351}); - table.put(0x1FB3, new int [] {0x03B1, 0x0345}); - table.put(0xFA34, new int [] {0x52E4}); - table.put(0x1FB2, new int [] {0x03B1, 0x0300, 0x0345}); - table.put(0xFA33, new int [] {0x52C9}); - table.put(0x1FB1, new int [] {0x03B1, 0x0304}); - table.put(0xFA32, new int [] {0x514D}); - table.put(0x1FB0, new int [] {0x03B1, 0x0306}); - table.put(0xFA31, new int [] {0x50E7}); - table.put(0xFA30, new int [] {0x4FAE}); - table.put(0x1FAF, new int [] {0x03A9, 0x0314, 0x0342, 0x0345}); - table.put(0x1FAE, new int [] {0x03A9, 0x0313, 0x0342, 0x0345}); - table.put(0x1FAD, new int [] {0x03A9, 0x0314, 0x0301, 0x0345}); - table.put(0x1FAC, new int [] {0x03A9, 0x0313, 0x0301, 0x0345}); - table.put(0xFA2D, new int [] {0x9DB4}); - table.put(0x1FAB, new int [] {0x03A9, 0x0314, 0x0300, 0x0345}); - table.put(0xFA2C, new int [] {0x9928}); - table.put(0x1FAA, new int [] {0x03A9, 0x0313, 0x0300, 0x0345}); - table.put(0xFA2B, new int [] {0x98FC}); - table.put(0xFA2A, new int [] {0x98EF}); - table.put(0x1FA9, new int [] {0x03A9, 0x0314, 0x0345}); - table.put(0x1FA8, new int [] {0x03A9, 0x0313, 0x0345}); - table.put(0x1FA7, new int [] {0x03C9, 0x0314, 0x0342, 0x0345}); - table.put(0x1FA6, new int [] {0x03C9, 0x0313, 0x0342, 0x0345}); - table.put(0x1FA5, new int [] {0x03C9, 0x0314, 0x0301, 0x0345}); - table.put(0xFA26, new int [] {0x90FD}); - table.put(0x1FA4, new int [] {0x03C9, 0x0313, 0x0301, 0x0345}); - table.put(0xFA25, new int [] {0x9038}); - table.put(0x1FA3, new int [] {0x03C9, 0x0314, 0x0300, 0x0345}); - table.put(0x1FA2, new int [] {0x03C9, 0x0313, 0x0300, 0x0345}); - table.put(0x1FA1, new int [] {0x03C9, 0x0314, 0x0345}); - table.put(0xFA22, new int [] {0x8AF8}); - table.put(0x1FA0, new int [] {0x03C9, 0x0313, 0x0345}); - table.put(0xFA20, new int [] {0x8612}); - table.put(0xFA1E, new int [] {0x7FBD}); - table.put(0xFA1D, new int [] {0x7CBE}); - table.put(0xFA1C, new int [] {0x9756}); - table.put(0xFA1B, new int [] {0x798F}); - table.put(0xFA1A, new int [] {0x7965}); - table.put(0xFA19, new int [] {0x795E}); - table.put(0xFA18, new int [] {0x793C}); - table.put(0xFA17, new int [] {0x76CA}); - table.put(0xFA16, new int [] {0x732A}); - table.put(0xFA15, new int [] {0x51DE}); - table.put(0xFA12, new int [] {0x6674}); - table.put(0xFA10, new int [] {0x585A}); - table.put(0xFA0D, new int [] {0x55C0}); - table.put(0xFA0C, new int [] {0x5140}); - table.put(0xFA0B, new int [] {0x5ED3}); - table.put(0xFA0A, new int [] {0x898B}); - table.put(0x212B, new int [] {0x0041, 0x030A}); - table.put(0x212A, new int [] {0x004B}); - table.put(0xFA09, new int [] {0x964D}); - table.put(0xFA08, new int [] {0x884C}); - table.put(0xFA07, new int [] {0x8F3B}); - table.put(0xFA06, new int [] {0x66B4}); - table.put(0xFA05, new int [] {0x6D1E}); - table.put(0xFA04, new int [] {0x5B85}); - table.put(0xFA03, new int [] {0x7CD6}); - table.put(0xFA02, new int [] {0x62D3}); - table.put(0x2126, new int [] {0x03A9}); - table.put(0xFA01, new int [] {0x5EA6}); - table.put(0xFA00, new int [] {0x5207}); - table.put(0x0B94, new int [] {0x0B92, 0x0BD7}); - table.put(0x1026, new int [] {0x1025, 0x102E}); - table.put(0x0B5D, new int [] {0x0B22, 0x0B3C}); - table.put(0x0B5C, new int [] {0x0B21, 0x0B3C}); - table.put(0x1F9F, new int [] {0x0397, 0x0314, 0x0342, 0x0345}); - table.put(0x1F9E, new int [] {0x0397, 0x0313, 0x0342, 0x0345}); - table.put(0x1F9D, new int [] {0x0397, 0x0314, 0x0301, 0x0345}); - table.put(0x1F9C, new int [] {0x0397, 0x0313, 0x0301, 0x0345}); - table.put(0x1F9B, new int [] {0x0397, 0x0314, 0x0300, 0x0345}); - table.put(0x1F9A, new int [] {0x0397, 0x0313, 0x0300, 0x0345}); - table.put(0x0B4C, new int [] {0x0B47, 0x0B57}); - table.put(0x0B4B, new int [] {0x0B47, 0x0B3E}); - table.put(0x1F99, new int [] {0x0397, 0x0314, 0x0345}); - table.put(0x1F98, new int [] {0x0397, 0x0313, 0x0345}); - table.put(0x1F97, new int [] {0x03B7, 0x0314, 0x0342, 0x0345}); - table.put(0x1F96, new int [] {0x03B7, 0x0313, 0x0342, 0x0345}); - table.put(0x1F95, new int [] {0x03B7, 0x0314, 0x0301, 0x0345}); - table.put(0x1F94, new int [] {0x03B7, 0x0313, 0x0301, 0x0345}); - table.put(0x1F93, new int [] {0x03B7, 0x0314, 0x0300, 0x0345}); - table.put(0x1F92, new int [] {0x03B7, 0x0313, 0x0300, 0x0345}); - table.put(0x1F91, new int [] {0x03B7, 0x0314, 0x0345}); - table.put(0x1F90, new int [] {0x03B7, 0x0313, 0x0345}); - table.put(0x0B48, new int [] {0x0B47, 0x0B56}); - table.put(0x1F8F, new int [] {0x0391, 0x0314, 0x0342, 0x0345}); - table.put(0x1F8E, new int [] {0x0391, 0x0313, 0x0342, 0x0345}); - table.put(0x1F8D, new int [] {0x0391, 0x0314, 0x0301, 0x0345}); - table.put(0x1F8C, new int [] {0x0391, 0x0313, 0x0301, 0x0345}); - table.put(0x1F8B, new int [] {0x0391, 0x0314, 0x0300, 0x0345}); - table.put(0x1F8A, new int [] {0x0391, 0x0313, 0x0300, 0x0345}); - table.put(0x1F89, new int [] {0x0391, 0x0314, 0x0345}); - table.put(0x1F88, new int [] {0x0391, 0x0313, 0x0345}); - table.put(0x1F87, new int [] {0x03B1, 0x0314, 0x0342, 0x0345}); - table.put(0x1F86, new int [] {0x03B1, 0x0313, 0x0342, 0x0345}); - table.put(0x1F85, new int [] {0x03B1, 0x0314, 0x0301, 0x0345}); - table.put(0x1F84, new int [] {0x03B1, 0x0313, 0x0301, 0x0345}); - table.put(0x1F83, new int [] {0x03B1, 0x0314, 0x0300, 0x0345}); - table.put(0x1F82, new int [] {0x03B1, 0x0313, 0x0300, 0x0345}); - table.put(0x1F81, new int [] {0x03B1, 0x0314, 0x0345}); - table.put(0x1F80, new int [] {0x03B1, 0x0313, 0x0345}); - table.put(0x1F7D, new int [] {0x03C9, 0x0301}); - table.put(0x1F7C, new int [] {0x03C9, 0x0300}); - table.put(0x1F7B, new int [] {0x03C5, 0x0301}); - table.put(0x1F7A, new int [] {0x03C5, 0x0300}); - table.put(0x1F79, new int [] {0x03BF, 0x0301}); - table.put(0x1F78, new int [] {0x03BF, 0x0300}); - table.put(0x1F77, new int [] {0x03B9, 0x0301}); - table.put(0x1F76, new int [] {0x03B9, 0x0300}); - table.put(0x1F75, new int [] {0x03B7, 0x0301}); - table.put(0x0233, new int [] {0x0079, 0x0304}); - table.put(0x1F74, new int [] {0x03B7, 0x0300}); - table.put(0x0232, new int [] {0x0059, 0x0304}); - table.put(0x1F73, new int [] {0x03B5, 0x0301}); - table.put(0x0231, new int [] {0x006F, 0x0307, 0x0304}); - table.put(0x1F72, new int [] {0x03B5, 0x0300}); - table.put(0x0230, new int [] {0x004F, 0x0307, 0x0304}); - table.put(0x1F71, new int [] {0x03B1, 0x0301}); - table.put(0x1F70, new int [] {0x03B1, 0x0300}); - table.put(0x022F, new int [] {0x006F, 0x0307}); - table.put(0x022E, new int [] {0x004F, 0x0307}); - table.put(0x1F6F, new int [] {0x03A9, 0x0314, 0x0342}); - table.put(0x022D, new int [] {0x006F, 0x0303, 0x0304}); - table.put(0x1F6E, new int [] {0x03A9, 0x0313, 0x0342}); - table.put(0x022C, new int [] {0x004F, 0x0303, 0x0304}); - table.put(0x1F6D, new int [] {0x03A9, 0x0314, 0x0301}); - table.put(0x022B, new int [] {0x006F, 0x0308, 0x0304}); - table.put(0x1F6C, new int [] {0x03A9, 0x0313, 0x0301}); - table.put(0x022A, new int [] {0x004F, 0x0308, 0x0304}); - table.put(0x1F6B, new int [] {0x03A9, 0x0314, 0x0300}); - table.put(0x1F6A, new int [] {0x03A9, 0x0313, 0x0300}); - table.put(0x0229, new int [] {0x0065, 0x0327}); - table.put(0x0228, new int [] {0x0045, 0x0327}); - table.put(0x1F69, new int [] {0x03A9, 0x0314}); - table.put(0x0227, new int [] {0x0061, 0x0307}); - table.put(0x1F68, new int [] {0x03A9, 0x0313}); - table.put(0x0226, new int [] {0x0041, 0x0307}); - table.put(0x1F67, new int [] {0x03C9, 0x0314, 0x0342}); - table.put(0x1F66, new int [] {0x03C9, 0x0313, 0x0342}); - table.put(0x1F65, new int [] {0x03C9, 0x0314, 0x0301}); - table.put(0x1F64, new int [] {0x03C9, 0x0313, 0x0301}); - table.put(0x1F63, new int [] {0x03C9, 0x0314, 0x0300}); - table.put(0x1F62, new int [] {0x03C9, 0x0313, 0x0300}); - table.put(0x1F61, new int [] {0x03C9, 0x0314}); - table.put(0x1F60, new int [] {0x03C9, 0x0313}); - table.put(0x021F, new int [] {0x0068, 0x030C}); - table.put(0x021E, new int [] {0x0048, 0x030C}); - table.put(0x1F5F, new int [] {0x03A5, 0x0314, 0x0342}); - table.put(0x1F5D, new int [] {0x03A5, 0x0314, 0x0301}); - table.put(0x021B, new int [] {0x0074, 0x0326}); - table.put(0x021A, new int [] {0x0054, 0x0326}); - table.put(0x1F5B, new int [] {0x03A5, 0x0314, 0x0300}); - table.put(0x0219, new int [] {0x0073, 0x0326}); - table.put(0x0218, new int [] {0x0053, 0x0326}); - table.put(0x1F59, new int [] {0x03A5, 0x0314}); - table.put(0x0217, new int [] {0x0075, 0x0311}); - table.put(0x0216, new int [] {0x0055, 0x0311}); - table.put(0x1F57, new int [] {0x03C5, 0x0314, 0x0342}); - table.put(0x0215, new int [] {0x0075, 0x030F}); - table.put(0x1F56, new int [] {0x03C5, 0x0313, 0x0342}); - table.put(0x0214, new int [] {0x0055, 0x030F}); - table.put(0x1F55, new int [] {0x03C5, 0x0314, 0x0301}); - table.put(0x0213, new int [] {0x0072, 0x0311}); - table.put(0x1F54, new int [] {0x03C5, 0x0313, 0x0301}); - table.put(0x0212, new int [] {0x0052, 0x0311}); - table.put(0x1F53, new int [] {0x03C5, 0x0314, 0x0300}); - table.put(0x0211, new int [] {0x0072, 0x030F}); - table.put(0x1F52, new int [] {0x03C5, 0x0313, 0x0300}); - table.put(0x0210, new int [] {0x0052, 0x030F}); - table.put(0x1F51, new int [] {0x03C5, 0x0314}); - table.put(0x1F50, new int [] {0x03C5, 0x0313}); - table.put(0x020F, new int [] {0x006F, 0x0311}); - table.put(0x020E, new int [] {0x004F, 0x0311}); - table.put(0x020D, new int [] {0x006F, 0x030F}); - table.put(0x020C, new int [] {0x004F, 0x030F}); - table.put(0x1F4D, new int [] {0x039F, 0x0314, 0x0301}); - table.put(0x020B, new int [] {0x0069, 0x0311}); - table.put(0x1F4C, new int [] {0x039F, 0x0313, 0x0301}); - table.put(0x020A, new int [] {0x0049, 0x0311}); - table.put(0x1F4B, new int [] {0x039F, 0x0314, 0x0300}); - table.put(0x1F4A, new int [] {0x039F, 0x0313, 0x0300}); - table.put(0x0209, new int [] {0x0069, 0x030F}); - table.put(0x0208, new int [] {0x0049, 0x030F}); - table.put(0x1F49, new int [] {0x039F, 0x0314}); - table.put(0x0207, new int [] {0x0065, 0x0311}); - table.put(0x1F48, new int [] {0x039F, 0x0313}); - table.put(0x0206, new int [] {0x0045, 0x0311}); - table.put(0x0205, new int [] {0x0065, 0x030F}); - table.put(0x0204, new int [] {0x0045, 0x030F}); - table.put(0x1F45, new int [] {0x03BF, 0x0314, 0x0301}); - table.put(0x0203, new int [] {0x0061, 0x0311}); - table.put(0x1F44, new int [] {0x03BF, 0x0313, 0x0301}); - table.put(0x0202, new int [] {0x0041, 0x0311}); - table.put(0x1F43, new int [] {0x03BF, 0x0314, 0x0300}); - table.put(0x0201, new int [] {0x0061, 0x030F}); - table.put(0x1F42, new int [] {0x03BF, 0x0313, 0x0300}); - table.put(0x0200, new int [] {0x0041, 0x030F}); - table.put(0x1F41, new int [] {0x03BF, 0x0314}); - table.put(0x1F40, new int [] {0x03BF, 0x0313}); - table.put(0x2F9FF, new int [] {0x980B}); - table.put(0x2F9FE, new int [] {0x980B}); - table.put(0x1F3F, new int [] {0x0399, 0x0314, 0x0342}); - table.put(0x2F9FD, new int [] {0x29496}); - table.put(0x1F3E, new int [] {0x0399, 0x0313, 0x0342}); - table.put(0x2F9FC, new int [] {0x4AB2}); - table.put(0x1F3D, new int [] {0x0399, 0x0314, 0x0301}); - table.put(0x2F9FB, new int [] {0x2940A}); - table.put(0x1F3C, new int [] {0x0399, 0x0313, 0x0301}); - table.put(0x2F9FA, new int [] {0x97E0}); - table.put(0x1F3B, new int [] {0x0399, 0x0314, 0x0300}); - table.put(0x1F3A, new int [] {0x0399, 0x0313, 0x0300}); - table.put(0x2F9F9, new int [] {0x4A76}); - table.put(0x2F9F8, new int [] {0x4A6E}); - table.put(0x1F39, new int [] {0x0399, 0x0314}); - table.put(0x2F9F7, new int [] {0x2921A}); - table.put(0x1F38, new int [] {0x0399, 0x0313}); - table.put(0x2F9F6, new int [] {0x29145}); - table.put(0x1F37, new int [] {0x03B9, 0x0314, 0x0342}); - table.put(0x2F9F5, new int [] {0x9723}); - table.put(0x1F36, new int [] {0x03B9, 0x0313, 0x0342}); - table.put(0x2F9F4, new int [] {0x5DB2}); - table.put(0x1F35, new int [] {0x03B9, 0x0314, 0x0301}); - table.put(0x2F9F3, new int [] {0x96C3}); - table.put(0x1F34, new int [] {0x03B9, 0x0313, 0x0301}); - table.put(0x2F9F2, new int [] {0x49E6}); - table.put(0x1F33, new int [] {0x03B9, 0x0314, 0x0300}); - table.put(0x2F9F1, new int [] {0x28D77}); - table.put(0x1F32, new int [] {0x03B9, 0x0313, 0x0300}); - table.put(0x2F9F0, new int [] {0x95B7}); - table.put(0x1F31, new int [] {0x03B9, 0x0314}); - table.put(0x1F30, new int [] {0x03B9, 0x0313}); - table.put(0x2F9EF, new int [] {0x4995}); - table.put(0x2F9EE, new int [] {0x958B}); - table.put(0x1F2F, new int [] {0x0397, 0x0314, 0x0342}); - table.put(0x2F9ED, new int [] {0x28BFA}); - table.put(0x1F2E, new int [] {0x0397, 0x0313, 0x0342}); - table.put(0x2F9EC, new int [] {0x9415}); - table.put(0x1F2D, new int [] {0x0397, 0x0314, 0x0301}); - table.put(0x2F9EB, new int [] {0x93F9}); - table.put(0x1F2C, new int [] {0x0397, 0x0313, 0x0301}); - table.put(0x2F9EA, new int [] {0x927C}); - table.put(0x1F2B, new int [] {0x0397, 0x0314, 0x0300}); - table.put(0x1F2A, new int [] {0x0397, 0x0313, 0x0300}); - table.put(0x2F9E9, new int [] {0x92D8}); - table.put(0x2F9E8, new int [] {0x92D7}); - table.put(0x1F29, new int [] {0x0397, 0x0314}); - table.put(0x2F9E7, new int [] {0x9238}); - table.put(0x1F28, new int [] {0x0397, 0x0313}); - table.put(0x2F9E6, new int [] {0x911B}); - table.put(0x1F27, new int [] {0x03B7, 0x0314, 0x0342}); - table.put(0x2F9E5, new int [] {0x2872E}); - table.put(0x1F26, new int [] {0x03B7, 0x0313, 0x0342}); - table.put(0x2F9E4, new int [] {0x9111}); - table.put(0x1F25, new int [] {0x03B7, 0x0314, 0x0301}); - table.put(0x2F9E3, new int [] {0x90F1}); - table.put(0x1F24, new int [] {0x03B7, 0x0313, 0x0301}); - table.put(0x2F9E2, new int [] {0x9094}); - table.put(0x1F23, new int [] {0x03B7, 0x0314, 0x0300}); - table.put(0x2F9E1, new int [] {0x285ED}); - table.put(0x1F22, new int [] {0x03B7, 0x0313, 0x0300}); - table.put(0x2F9E0, new int [] {0x285D2}); - table.put(0x1F21, new int [] {0x03B7, 0x0314}); - table.put(0x1F20, new int [] {0x03B7, 0x0313}); - table.put(0x2F9DF, new int [] {0x8F38}); - table.put(0x2F9DE, new int [] {0x8ED4}); - table.put(0x2F9DD, new int [] {0x208DE}); - table.put(0x2F9DC, new int [] {0x8DF0}); - table.put(0x1F1D, new int [] {0x0395, 0x0314, 0x0301}); - table.put(0x2F9DB, new int [] {0x8DBC}); - table.put(0x1F1C, new int [] {0x0395, 0x0313, 0x0301}); - table.put(0x2F9DA, new int [] {0x8DCB}); - table.put(0x1F1B, new int [] {0x0395, 0x0314, 0x0300}); - table.put(0x1F1A, new int [] {0x0395, 0x0313, 0x0300}); - table.put(0x2F9D9, new int [] {0x20804}); - table.put(0x2F9D8, new int [] {0x27F2F}); - table.put(0x1F19, new int [] {0x0395, 0x0314}); - table.put(0x2F9D7, new int [] {0x8D77}); - table.put(0x1F18, new int [] {0x0395, 0x0313}); - table.put(0x2F9D6, new int [] {0x8D1B}); - table.put(0x2F9D5, new int [] {0x8CC1}); - table.put(0x2F9D4, new int [] {0x8CAB}); - table.put(0x1F15, new int [] {0x03B5, 0x0314, 0x0301}); - table.put(0x2F9D3, new int [] {0x27CA8}); - table.put(0x1F14, new int [] {0x03B5, 0x0313, 0x0301}); - table.put(0x2F9D2, new int [] {0x8C55}); - table.put(0x1F13, new int [] {0x03B5, 0x0314, 0x0300}); - table.put(0x2F9D1, new int [] {0x8B8A}); - table.put(0x1F12, new int [] {0x03B5, 0x0313, 0x0300}); - table.put(0x2F9D0, new int [] {0x8AED}); - table.put(0x1F11, new int [] {0x03B5, 0x0314}); - table.put(0x1F10, new int [] {0x03B5, 0x0313}); - table.put(0x2F9CF, new int [] {0x8AA0}); - table.put(0x2F9CE, new int [] {0x46C7}); - table.put(0x1F0F, new int [] {0x0391, 0x0314, 0x0342}); - table.put(0x2F9CD, new int [] {0x46BE}); - table.put(0x1F0E, new int [] {0x0391, 0x0313, 0x0342}); - table.put(0x2F9CC, new int [] {0x27966}); - table.put(0x1F0D, new int [] {0x0391, 0x0314, 0x0301}); - table.put(0x2F9CB, new int [] {0x278AE}); - table.put(0x1F0C, new int [] {0x0391, 0x0313, 0x0301}); - table.put(0x2F9CA, new int [] {0x34BB}); - table.put(0x1F0B, new int [] {0x0391, 0x0314, 0x0300}); - table.put(0x1F0A, new int [] {0x0391, 0x0313, 0x0300}); - table.put(0x2F9C9, new int [] {0x88FA}); - table.put(0x2F9C8, new int [] {0x4635}); - table.put(0x1F09, new int [] {0x0391, 0x0314}); - table.put(0x2F9C7, new int [] {0x88DE}); - table.put(0x1F08, new int [] {0x0391, 0x0313}); - table.put(0x2F9C6, new int [] {0x88D7}); - table.put(0x1F07, new int [] {0x03B1, 0x0314, 0x0342}); - table.put(0x2F9C5, new int [] {0x27667}); - table.put(0x1F06, new int [] {0x03B1, 0x0313, 0x0342}); - table.put(0x2F9C4, new int [] {0x8863}); - table.put(0x1F05, new int [] {0x03B1, 0x0314, 0x0301}); - table.put(0x2F9C3, new int [] {0x8860}); - table.put(0x1F04, new int [] {0x03B1, 0x0313, 0x0301}); - table.put(0x2F9C2, new int [] {0x45F9}); - table.put(0x1F03, new int [] {0x03B1, 0x0314, 0x0300}); - table.put(0x2F9C1, new int [] {0x8801}); - table.put(0x1F02, new int [] {0x03B1, 0x0313, 0x0300}); - table.put(0x2F9C0, new int [] {0x87E1}); - table.put(0x1F01, new int [] {0x03B1, 0x0314}); - table.put(0x1F00, new int [] {0x03B1, 0x0313}); - table.put(0x2F9BF, new int [] {0x45D7}); - table.put(0x2F9BE, new int [] {0x8786}); - table.put(0x2F9BD, new int [] {0x876B}); - table.put(0x2F9BC, new int [] {0x8728}); - table.put(0x2F9BB, new int [] {0x8779}); - table.put(0x2F9BA, new int [] {0x86E2}); - table.put(0x2F9B9, new int [] {0x870E}); - table.put(0x2F9B8, new int [] {0x8688}); - table.put(0x2F9B7, new int [] {0x86A9}); - table.put(0x2F9B6, new int [] {0x8669}); - table.put(0x2F9B5, new int [] {0x8667}); - table.put(0x2F9B4, new int [] {0x865C}); - table.put(0x2F9B3, new int [] {0x8650}); - table.put(0x2F9B2, new int [] {0x456B}); - table.put(0x2F9B1, new int [] {0x270D2}); - table.put(0x2F9B0, new int [] {0x26FB1}); - table.put(0x2F9AF, new int [] {0x4561}); - table.put(0x2F9AE, new int [] {0x455D}); - table.put(0x2F9AD, new int [] {0x26F2C}); - table.put(0x2F9AC, new int [] {0x8564}); - table.put(0x2F9AB, new int [] {0x273CA}); - table.put(0x2F9AA, new int [] {0x8516}); - table.put(0x2F9A9, new int [] {0x84F3}); - table.put(0x2F9A8, new int [] {0x84F1}); - table.put(0x2F9A7, new int [] {0x452B}); - table.put(0x2F9A6, new int [] {0x26CD5}); - table.put(0x2F9A5, new int [] {0x26D6B}); - table.put(0x2F9A4, new int [] {0x26C36}); - table.put(0x2F9A3, new int [] {0x83DC}); - table.put(0x2F9A2, new int [] {0x83CC}); - table.put(0x2F9A1, new int [] {0x83CA}); - table.put(0x2F9A0, new int [] {0x8353}); - table.put(0x01FF, new int [] {0x00F8, 0x0301}); - table.put(0x01FE, new int [] {0x00D8, 0x0301}); - table.put(0x01FD, new int [] {0x00E6, 0x0301}); - table.put(0x01FC, new int [] {0x00C6, 0x0301}); - table.put(0x01FB, new int [] {0x0061, 0x030A, 0x0301}); - table.put(0x01FA, new int [] {0x0041, 0x030A, 0x0301}); - table.put(0x01F9, new int [] {0x006E, 0x0300}); - table.put(0x01F8, new int [] {0x004E, 0x0300}); - table.put(0x01F5, new int [] {0x0067, 0x0301}); - table.put(0x01F4, new int [] {0x0047, 0x0301}); - table.put(0x01F0, new int [] {0x006A, 0x030C}); - table.put(0x01EF, new int [] {0x0292, 0x030C}); - table.put(0x01EE, new int [] {0x01B7, 0x030C}); - table.put(0x01ED, new int [] {0x006F, 0x0328, 0x0304}); - table.put(0x01EC, new int [] {0x004F, 0x0328, 0x0304}); - table.put(0x01EB, new int [] {0x006F, 0x0328}); - table.put(0x01EA, new int [] {0x004F, 0x0328}); - table.put(0x01E9, new int [] {0x006B, 0x030C}); - table.put(0x01E8, new int [] {0x004B, 0x030C}); - table.put(0x01E7, new int [] {0x0067, 0x030C}); - table.put(0x01E6, new int [] {0x0047, 0x030C}); - table.put(0x01E3, new int [] {0x00E6, 0x0304}); - table.put(0x01E2, new int [] {0x00C6, 0x0304}); - table.put(0x01E1, new int [] {0x0061, 0x0307, 0x0304}); - table.put(0x01E0, new int [] {0x0041, 0x0307, 0x0304}); - table.put(0x01DF, new int [] {0x0061, 0x0308, 0x0304}); - table.put(0x01DE, new int [] {0x0041, 0x0308, 0x0304}); - table.put(0x01DC, new int [] {0x0075, 0x0308, 0x0300}); - table.put(0x01DB, new int [] {0x0055, 0x0308, 0x0300}); - table.put(0x01DA, new int [] {0x0075, 0x0308, 0x030C}); - table.put(0x01D9, new int [] {0x0055, 0x0308, 0x030C}); - table.put(0x01D8, new int [] {0x0075, 0x0308, 0x0301}); - table.put(0x01D7, new int [] {0x0055, 0x0308, 0x0301}); - table.put(0x01D6, new int [] {0x0075, 0x0308, 0x0304}); - table.put(0x01D5, new int [] {0x0055, 0x0308, 0x0304}); - table.put(0x01D4, new int [] {0x0075, 0x030C}); - table.put(0x01D3, new int [] {0x0055, 0x030C}); - table.put(0x01D2, new int [] {0x006F, 0x030C}); - table.put(0x01D1, new int [] {0x004F, 0x030C}); - table.put(0x01D0, new int [] {0x0069, 0x030C}); - table.put(0x01CF, new int [] {0x0049, 0x030C}); - table.put(0x01CE, new int [] {0x0061, 0x030C}); - table.put(0x01CD, new int [] {0x0041, 0x030C}); - table.put(0x1EF9, new int [] {0x0079, 0x0303}); - table.put(0x1EF8, new int [] {0x0059, 0x0303}); - table.put(0x1EF7, new int [] {0x0079, 0x0309}); - table.put(0x1EF6, new int [] {0x0059, 0x0309}); - table.put(0x1EF5, new int [] {0x0079, 0x0323}); - table.put(0x1EF4, new int [] {0x0059, 0x0323}); - table.put(0x1EF3, new int [] {0x0079, 0x0300}); - table.put(0x1EF2, new int [] {0x0059, 0x0300}); - table.put(0x01B0, new int [] {0x0075, 0x031B}); - table.put(0x1EF1, new int [] {0x0075, 0x031B, 0x0323}); - table.put(0x1EF0, new int [] {0x0055, 0x031B, 0x0323}); - table.put(0x2F99F, new int [] {0x8457}); - table.put(0x01AF, new int [] {0x0055, 0x031B}); - table.put(0x2F99E, new int [] {0x83E7}); - table.put(0x1EEF, new int [] {0x0075, 0x031B, 0x0303}); - table.put(0x2F99D, new int [] {0x83BD}); - table.put(0x1EEE, new int [] {0x0055, 0x031B, 0x0303}); - table.put(0x2F99C, new int [] {0x8323}); - table.put(0x1EED, new int [] {0x0075, 0x031B, 0x0309}); - table.put(0x2F99B, new int [] {0x83AD}); - table.put(0x1EEC, new int [] {0x0055, 0x031B, 0x0309}); - table.put(0x2F99A, new int [] {0x8363}); - table.put(0x1EEB, new int [] {0x0075, 0x031B, 0x0300}); - table.put(0x1EEA, new int [] {0x0055, 0x031B, 0x0300}); - table.put(0x2F999, new int [] {0x831D}); - table.put(0x2F998, new int [] {0x82E5}); - table.put(0x1EE9, new int [] {0x0075, 0x031B, 0x0301}); - table.put(0x2F997, new int [] {0x26B3C}); - table.put(0x1EE8, new int [] {0x0055, 0x031B, 0x0301}); - table.put(0x2F996, new int [] {0x82E6}); - table.put(0x1EE7, new int [] {0x0075, 0x0309}); - table.put(0x2F995, new int [] {0x82BD}); - table.put(0x1EE6, new int [] {0x0055, 0x0309}); - table.put(0x2F994, new int [] {0x82B3}); - table.put(0x1EE5, new int [] {0x0075, 0x0323}); - table.put(0x2F993, new int [] {0x82B1}); - table.put(0x1EE4, new int [] {0x0055, 0x0323}); - table.put(0x2F992, new int [] {0x52B3}); - table.put(0x1EE3, new int [] {0x006F, 0x031B, 0x0323}); - table.put(0x2F991, new int [] {0x829D}); - table.put(0x01A1, new int [] {0x006F, 0x031B}); - table.put(0x1EE2, new int [] {0x004F, 0x031B, 0x0323}); - table.put(0x2F990, new int [] {0x828B}); - table.put(0x01A0, new int [] {0x004F, 0x031B}); - table.put(0x1EE1, new int [] {0x006F, 0x031B, 0x0303}); - table.put(0x1EE0, new int [] {0x004F, 0x031B, 0x0303}); - table.put(0x2F98F, new int [] {0x8291}); - table.put(0x2F98E, new int [] {0x446B}); - table.put(0x1EDF, new int [] {0x006F, 0x031B, 0x0309}); - table.put(0x2F98D, new int [] {0x8F9E}); - table.put(0x1EDE, new int [] {0x004F, 0x031B, 0x0309}); - table.put(0x2F98C, new int [] {0x8204}); - table.put(0x1EDD, new int [] {0x006F, 0x031B, 0x0300}); - table.put(0x2F98B, new int [] {0x8201}); - table.put(0x1EDC, new int [] {0x004F, 0x031B, 0x0300}); - table.put(0x2F98A, new int [] {0x2339C}); - table.put(0x1EDB, new int [] {0x006F, 0x031B, 0x0301}); - table.put(0x1EDA, new int [] {0x004F, 0x031B, 0x0301}); - table.put(0x2F989, new int [] {0x23393}); - table.put(0x2F988, new int [] {0x267B5}); - table.put(0x1ED9, new int [] {0x006F, 0x0323, 0x0302}); - table.put(0x2F987, new int [] {0x267A7}); - table.put(0x04F9, new int [] {0x044B, 0x0308}); - table.put(0x1ED8, new int [] {0x004F, 0x0323, 0x0302}); - table.put(0x2F986, new int [] {0x5AB5}); - table.put(0x04F8, new int [] {0x042B, 0x0308}); - table.put(0x1ED7, new int [] {0x006F, 0x0302, 0x0303}); - table.put(0x2F985, new int [] {0x813E}); - table.put(0x1ED6, new int [] {0x004F, 0x0302, 0x0303}); - table.put(0x2F984, new int [] {0x440B}); - table.put(0x1ED5, new int [] {0x006F, 0x0302, 0x0309}); - table.put(0x2F983, new int [] {0x8103}); - table.put(0x04F5, new int [] {0x0447, 0x0308}); - table.put(0x1ED4, new int [] {0x004F, 0x0302, 0x0309}); - table.put(0x2F982, new int [] {0x80B2}); - table.put(0x04F4, new int [] {0x0427, 0x0308}); - table.put(0x1ED3, new int [] {0x006F, 0x0302, 0x0300}); - table.put(0x2F981, new int [] {0x43D5}); - table.put(0x04F3, new int [] {0x0443, 0x030B}); - table.put(0x1ED2, new int [] {0x004F, 0x0302, 0x0300}); - table.put(0x2F980, new int [] {0x2335F}); - table.put(0x04F2, new int [] {0x0423, 0x030B}); - table.put(0x1ED1, new int [] {0x006F, 0x0302, 0x0301}); - table.put(0x04F1, new int [] {0x0443, 0x0308}); - table.put(0x1ED0, new int [] {0x004F, 0x0302, 0x0301}); - table.put(0x04F0, new int [] {0x0423, 0x0308}); - table.put(0x2F97F, new int [] {0x8070}); - table.put(0x2F97E, new int [] {0x265A8}); - table.put(0x1ECF, new int [] {0x006F, 0x0309}); - table.put(0x2F97D, new int [] {0x8060}); - table.put(0x04EF, new int [] {0x0443, 0x0304}); - table.put(0x1ECE, new int [] {0x004F, 0x0309}); - table.put(0x2F97C, new int [] {0x26523}); - table.put(0x04EE, new int [] {0x0423, 0x0304}); - table.put(0x1ECD, new int [] {0x006F, 0x0323}); - table.put(0x2F97B, new int [] {0x264DA}); - table.put(0x04ED, new int [] {0x044D, 0x0308}); - table.put(0x1ECC, new int [] {0x004F, 0x0323}); - table.put(0x2F97A, new int [] {0x8005}); - table.put(0x04EC, new int [] {0x042D, 0x0308}); - table.put(0x1ECB, new int [] {0x0069, 0x0323}); - table.put(0x04EB, new int [] {0x04E9, 0x0308}); - table.put(0x1ECA, new int [] {0x0049, 0x0323}); - table.put(0x04EA, new int [] {0x04E8, 0x0308}); - table.put(0x2F979, new int [] {0x7FFA}); - table.put(0x0DDE, new int [] {0x0DD9, 0x0DDF}); - table.put(0x2F978, new int [] {0x7F95}); - table.put(0x0DDD, new int [] {0x0DD9, 0x0DCF, 0x0DCA}); - table.put(0x1EC9, new int [] {0x0069, 0x0309}); - table.put(0x2F977, new int [] {0x2633E}); - table.put(0x0DDC, new int [] {0x0DD9, 0x0DCF}); - table.put(0x1EC8, new int [] {0x0049, 0x0309}); - table.put(0x2F976, new int [] {0x7F7A}); - table.put(0x1EC7, new int [] {0x0065, 0x0323, 0x0302}); - table.put(0x2F975, new int [] {0x262D9}); - table.put(0x04E7, new int [] {0x043E, 0x0308}); - table.put(0x0DDA, new int [] {0x0DD9, 0x0DCA}); - table.put(0x1EC6, new int [] {0x0045, 0x0323, 0x0302}); - table.put(0x2F974, new int [] {0x4359}); - table.put(0x04E6, new int [] {0x041E, 0x0308}); - table.put(0x1EC5, new int [] {0x0065, 0x0302, 0x0303}); - table.put(0x2F973, new int [] {0x26247}); - table.put(0x04E5, new int [] {0x0438, 0x0308}); - table.put(0x1EC4, new int [] {0x0045, 0x0302, 0x0303}); - table.put(0x2F972, new int [] {0x26228}); - table.put(0x04E4, new int [] {0x0418, 0x0308}); - table.put(0x1EC3, new int [] {0x0065, 0x0302, 0x0309}); - table.put(0x2F971, new int [] {0x4334}); - table.put(0x04E3, new int [] {0x0438, 0x0304}); - table.put(0x1EC2, new int [] {0x0045, 0x0302, 0x0309}); - table.put(0x2F970, new int [] {0x7E45}); - table.put(0x04E2, new int [] {0x0418, 0x0304}); - table.put(0x1EC1, new int [] {0x0065, 0x0302, 0x0300}); - table.put(0x1EC0, new int [] {0x0045, 0x0302, 0x0300}); - table.put(0xF9FF, new int [] {0x523A}); - table.put(0xF9FE, new int [] {0x8336}); - table.put(0x2F96F, new int [] {0x7E02}); - table.put(0xF9FD, new int [] {0x4EC0}); - table.put(0x2F96E, new int [] {0x7DC7}); - table.put(0x1EBF, new int [] {0x0065, 0x0302, 0x0301}); - table.put(0xF9FC, new int [] {0x8B58}); - table.put(0x2F96D, new int [] {0x4301}); - table.put(0x04DF, new int [] {0x0437, 0x0308}); - table.put(0x1EBE, new int [] {0x0045, 0x0302, 0x0301}); - table.put(0xF9FB, new int [] {0x7099}); - table.put(0x2F96C, new int [] {0x7D63}); - table.put(0x04DE, new int [] {0x0417, 0x0308}); - table.put(0x1EBD, new int [] {0x0065, 0x0303}); - table.put(0xF9FA, new int [] {0x72C0}); - table.put(0x2F96B, new int [] {0x25F86}); - table.put(0x04DD, new int [] {0x0436, 0x0308}); - table.put(0x1EBC, new int [] {0x0045, 0x0303}); - table.put(0x2F96A, new int [] {0x7D00}); - table.put(0x04DC, new int [] {0x0416, 0x0308}); - table.put(0x1EBB, new int [] {0x0065, 0x0309}); - table.put(0x04DB, new int [] {0x04D9, 0x0308}); - table.put(0x1EBA, new int [] {0x0045, 0x0309}); - table.put(0x04DA, new int [] {0x04D8, 0x0308}); - table.put(0xF9F9, new int [] {0x7C92}); - table.put(0xF9F8, new int [] {0x7B20}); - table.put(0x2F969, new int [] {0x7CE3}); - table.put(0xF9F7, new int [] {0x7ACB}); - table.put(0x2F968, new int [] {0x7CE8}); - table.put(0x1EB9, new int [] {0x0065, 0x0323}); - table.put(0xF9F6, new int [] {0x81E8}); - table.put(0x2F967, new int [] {0x42A0}); - table.put(0x1EB8, new int [] {0x0045, 0x0323}); - table.put(0xF9F5, new int [] {0x6DCB}); - table.put(0x2F966, new int [] {0x7CD2}); - table.put(0x1EB7, new int [] {0x0061, 0x0323, 0x0306}); - table.put(0xF9F4, new int [] {0x6797}); - table.put(0x2F965, new int [] {0x25C80}); - table.put(0x04D7, new int [] {0x0435, 0x0306}); - table.put(0x1EB6, new int [] {0x0041, 0x0323, 0x0306}); - table.put(0xF9F3, new int [] {0x9E9F}); - table.put(0x2F964, new int [] {0x4227}); - table.put(0x04D6, new int [] {0x0415, 0x0306}); - table.put(0x1EB5, new int [] {0x0061, 0x0306, 0x0303}); - table.put(0xF9F2, new int [] {0x9C57}); - table.put(0x2F963, new int [] {0x7BC9}); - table.put(0x1EB4, new int [] {0x0041, 0x0306, 0x0303}); - table.put(0xF9F1, new int [] {0x96A3}); - table.put(0x2F962, new int [] {0x7BC6}); - table.put(0x1EB3, new int [] {0x0061, 0x0306, 0x0309}); - table.put(0xF9F0, new int [] {0x85FA}); - table.put(0x2F961, new int [] {0x25BAB}); - table.put(0x04D3, new int [] {0x0430, 0x0308}); - table.put(0x1EB2, new int [] {0x0041, 0x0306, 0x0309}); - table.put(0x2F960, new int [] {0x4202}); - table.put(0x04D2, new int [] {0x0410, 0x0308}); - table.put(0x1EB1, new int [] {0x0061, 0x0306, 0x0300}); - table.put(0x04D1, new int [] {0x0430, 0x0306}); - table.put(0x1EB0, new int [] {0x0041, 0x0306, 0x0300}); - table.put(0x04D0, new int [] {0x0410, 0x0306}); - table.put(0xF9EF, new int [] {0x7498}); - table.put(0xF9EE, new int [] {0x71D0}); - table.put(0x2F95F, new int [] {0x7AEE}); - table.put(0xF9ED, new int [] {0x541D}); - table.put(0x2F95E, new int [] {0x25AA7}); - table.put(0x1EAF, new int [] {0x0061, 0x0306, 0x0301}); - table.put(0xF9EC, new int [] {0x6EBA}); - table.put(0x2F95D, new int [] {0x25AA7}); - table.put(0x1EAE, new int [] {0x0041, 0x0306, 0x0301}); - table.put(0xF9EB, new int [] {0x533F}); - table.put(0x2F95C, new int [] {0x2597C}); - table.put(0x1EAD, new int [] {0x0061, 0x0323, 0x0302}); - table.put(0xF9EA, new int [] {0x96E2}); - table.put(0x2F95B, new int [] {0x7A4F}); - table.put(0x1EAC, new int [] {0x0041, 0x0323, 0x0302}); - table.put(0x2F95A, new int [] {0x7A4A}); - table.put(0x1EAB, new int [] {0x0061, 0x0302, 0x0303}); - table.put(0x1EAA, new int [] {0x0041, 0x0302, 0x0303}); - table.put(0xF9E9, new int [] {0x91CC}); - table.put(0xF9E8, new int [] {0x88E1}); - table.put(0x2F959, new int [] {0x7A40}); - table.put(0xF9E7, new int [] {0x88CF}); - table.put(0x2F958, new int [] {0x412F}); - table.put(0x1EA9, new int [] {0x0061, 0x0302, 0x0309}); - table.put(0xF9E6, new int [] {0x7F79}); - table.put(0x2F957, new int [] {0x79EB}); - table.put(0x1EA8, new int [] {0x0041, 0x0302, 0x0309}); - table.put(0xF9E5, new int [] {0x75E2}); - table.put(0x2F956, new int [] {0x798F}); - table.put(0x1EA7, new int [] {0x0061, 0x0302, 0x0300}); - table.put(0xF9E4, new int [] {0x7406}); - table.put(0x2F955, new int [] {0x256C5}); - table.put(0x1EA6, new int [] {0x0041, 0x0302, 0x0300}); - table.put(0xF9E3, new int [] {0x6CE5}); - table.put(0x2F954, new int [] {0x2569A}); - table.put(0x1EA5, new int [] {0x0061, 0x0302, 0x0301}); - table.put(0xF9E2, new int [] {0x68A8}); - table.put(0x2F953, new int [] {0x7956}); - table.put(0x1EA4, new int [] {0x0041, 0x0302, 0x0301}); - table.put(0xF9E1, new int [] {0x674E}); - table.put(0x2F952, new int [] {0x25626}); - table.put(0x1EA3, new int [] {0x0061, 0x0309}); - table.put(0xF9E0, new int [] {0x6613}); - table.put(0x2F951, new int [] {0x40E3}); - table.put(0x1EA2, new int [] {0x0041, 0x0309}); - table.put(0x2F950, new int [] {0x78CC}); - table.put(0x04C2, new int [] {0x0436, 0x0306}); - table.put(0x1EA1, new int [] {0x0061, 0x0323}); - table.put(0x04C1, new int [] {0x0416, 0x0306}); - table.put(0x1EA0, new int [] {0x0041, 0x0323}); - table.put(0xF9DF, new int [] {0x5C65}); - table.put(0xF9DE, new int [] {0x540F}); - table.put(0x2F94F, new int [] {0x788C}); - table.put(0xF9DD, new int [] {0x5229}); - table.put(0x2F94E, new int [] {0x784E}); - table.put(0xF9DC, new int [] {0x9686}); - table.put(0x2F94D, new int [] {0x2541D}); - table.put(0xF9DB, new int [] {0x7387}); - table.put(0x2F94C, new int [] {0x4096}); - table.put(0xF9DA, new int [] {0x6817}); - table.put(0x2F94B, new int [] {0x4046}); - table.put(0x2F94A, new int [] {0x778B}); - table.put(0xF9D9, new int [] {0x6144}); - table.put(0xF9D8, new int [] {0x5F8B}); - table.put(0x2F949, new int [] {0x4039}); - table.put(0xF9D7, new int [] {0x8F2A}); - table.put(0x2F948, new int [] {0x774A}); - table.put(0xF9D6, new int [] {0x6DEA}); - table.put(0x2F947, new int [] {0x771F}); - table.put(0xF9D5, new int [] {0x5D19}); - table.put(0x2F946, new int [] {0x771F}); - table.put(0xF9D4, new int [] {0x502B}); - table.put(0x2F945, new int [] {0x771E}); - table.put(0xF9D3, new int [] {0x9678}); - table.put(0x2F944, new int [] {0x25133}); - table.put(0xF9D2, new int [] {0x622E}); - table.put(0x2F943, new int [] {0x25119}); - table.put(0xF9D1, new int [] {0x516D}); - table.put(0x2F942, new int [] {0x250F2}); - table.put(0xF9D0, new int [] {0x985E}); - table.put(0x2F941, new int [] {0x250F3}); - table.put(0x2F940, new int [] {0x76F4}); - table.put(0xF9CF, new int [] {0x7D10}); - table.put(0xF9CE, new int [] {0x786B}); - table.put(0x2F93F, new int [] {0x4008}); - table.put(0xF9CD, new int [] {0x7559}); - table.put(0x2F93E, new int [] {0x3FFC}); - table.put(0xF9CC, new int [] {0x7409}); - table.put(0x2F93D, new int [] {0x25044}); - table.put(0xF9CB, new int [] {0x6E9C}); - table.put(0x2F93C, new int [] {0x24FB8}); - table.put(0xF9CA, new int [] {0x6D41}); - table.put(0x2F93B, new int [] {0x24FA1}); - table.put(0x2F93A, new int [] {0x7610}); - table.put(0xF9C9, new int [] {0x67F3}); - table.put(0xF9C8, new int [] {0x677B}); - table.put(0x2F939, new int [] {0x2219F}); - table.put(0xF9C7, new int [] {0x5289}); - table.put(0x2F938, new int [] {0x7570}); - table.put(0xF9C6, new int [] {0x962E}); - table.put(0x2F937, new int [] {0x24C92}); - table.put(0xF9C5, new int [] {0x6688}); - table.put(0x2F936, new int [] {0x753E}); - table.put(0xF9C4, new int [] {0x9F8D}); - table.put(0x2F935, new int [] {0x24C36}); - table.put(0xF9C3, new int [] {0x907C}); - table.put(0x2F934, new int [] {0x7524}); - table.put(0xF9C2, new int [] {0x84FC}); - table.put(0x2F933, new int [] {0x3F1B}); - table.put(0xF9C1, new int [] {0x7642}); - table.put(0x2F932, new int [] {0x74CA}); - table.put(0xF9C0, new int [] {0x71CE}); - table.put(0x2F931, new int [] {0x7485}); - table.put(0x2F930, new int [] {0x7471}); - table.put(0xF9BF, new int [] {0x6A02}); - table.put(0xF9BE, new int [] {0x6599}); - table.put(0x2F92F, new int [] {0x745C}); - table.put(0xF9BD, new int [] {0x5C3F}); - table.put(0x2F92E, new int [] {0x7447}); - table.put(0xF9BC, new int [] {0x5BEE}); - table.put(0x2F92D, new int [] {0x3EB8}); - table.put(0xF9BB, new int [] {0x50DA}); - table.put(0x2F92C, new int [] {0x3EB8}); - table.put(0xF9BA, new int [] {0x4E86}); - table.put(0x2F92B, new int [] {0x73A5}); - table.put(0x2F92A, new int [] {0x3EAC}); - table.put(0xF9B9, new int [] {0x60E1}); - table.put(0xF9B8, new int [] {0x96B8}); - table.put(0x2F929, new int [] {0x738B}); - table.put(0xF9B7, new int [] {0x91B4}); - table.put(0x2F928, new int [] {0x737A}); - table.put(0xF9B6, new int [] {0x79AE}); - table.put(0x2F927, new int [] {0x24814}); - table.put(0xF9B5, new int [] {0x4F8B}); - table.put(0x2F926, new int [] {0x24735}); - table.put(0xF9B4, new int [] {0x9818}); - table.put(0x2F925, new int [] {0x7295}); - table.put(0xF9B3, new int [] {0x9748}); - table.put(0x2F924, new int [] {0x7280}); - table.put(0xF9B2, new int [] {0x96F6}); - table.put(0x2F923, new int [] {0x24608}); - table.put(0xF9B1, new int [] {0x9234}); - table.put(0x2F922, new int [] {0x7250}); - table.put(0xF9B0, new int [] {0x8046}); - table.put(0x2F921, new int [] {0x7235}); - table.put(0x2F920, new int [] {0x7228}); - table.put(0xF9AF, new int [] {0x7F9A}); - table.put(0xF9AE, new int [] {0x7469}); - table.put(0x2F91F, new int [] {0x243AB}); - table.put(0xF9AD, new int [] {0x73B2}); - table.put(0x2F91E, new int [] {0x719C}); - table.put(0xF9AC, new int [] {0x601C}); - table.put(0x2F91D, new int [] {0x24263}); - table.put(0xF9AB, new int [] {0x5DBA}); - table.put(0x2F91C, new int [] {0x7145}); - table.put(0xF9AA, new int [] {0x5BE7}); - table.put(0x2F91B, new int [] {0x20525}); - table.put(0x2F91A, new int [] {0x70AD}); - table.put(0xF9A9, new int [] {0x56F9}); - table.put(0xF9A8, new int [] {0x4EE4}); - table.put(0x2F919, new int [] {0x7077}); - table.put(0xF9A7, new int [] {0x7375}); - table.put(0x2F918, new int [] {0x707D}); - table.put(0xF9A6, new int [] {0x7C3E}); - table.put(0x2F917, new int [] {0x704A}); - table.put(0xF9A5, new int [] {0x6BAE}); - table.put(0x2F916, new int [] {0x3D96}); - table.put(0xF9A4, new int [] {0x637B}); - table.put(0x2F915, new int [] {0x701B}); - table.put(0xF9A3, new int [] {0x5FF5}); - table.put(0x2F914, new int [] {0x701E}); - table.put(0xF9A2, new int [] {0x5EC9}); - table.put(0x2F913, new int [] {0x7039}); - table.put(0xF9A1, new int [] {0x8AAA}); - table.put(0x2F912, new int [] {0x6FC6}); - table.put(0xF9A0, new int [] {0x88C2}); - table.put(0x2F911, new int [] {0x23F8E}); - table.put(0x2F910, new int [] {0x23F5E}); - table.put(0x2001, new int [] {0x2003}); - table.put(0x2000, new int [] {0x2002}); - table.put(0x2F90F, new int [] {0x6F6E}); - table.put(0x2F90E, new int [] {0x6DF9}); - table.put(0x2F90D, new int [] {0x23ED1}); - table.put(0x2F90C, new int [] {0x6EC7}); - table.put(0x2F90B, new int [] {0x6ECB}); - table.put(0x2F90A, new int [] {0x3D33}); - table.put(0x2F909, new int [] {0x6E6E}); - table.put(0x2F908, new int [] {0x6E2F}); - table.put(0x2F907, new int [] {0x6D34}); - table.put(0x2F906, new int [] {0x23D1E}); - table.put(0x2F905, new int [] {0x6D85}); - table.put(0x2F904, new int [] {0x6D78}); - table.put(0x2F903, new int [] {0x6D69}); - table.put(0x2F902, new int [] {0x6D41}); - table.put(0x2F901, new int [] {0x6D77}); - table.put(0x2F900, new int [] {0x6D3E}); - table.put(0x017E, new int [] {0x007A, 0x030C}); - table.put(0x017D, new int [] {0x005A, 0x030C}); - table.put(0x017C, new int [] {0x007A, 0x0307}); - table.put(0x017B, new int [] {0x005A, 0x0307}); - table.put(0x017A, new int [] {0x007A, 0x0301}); - table.put(0x0179, new int [] {0x005A, 0x0301}); - table.put(0x0178, new int [] {0x0059, 0x0308}); - table.put(0x0177, new int [] {0x0079, 0x0302}); - table.put(0x0176, new int [] {0x0059, 0x0302}); - table.put(0x0175, new int [] {0x0077, 0x0302}); - table.put(0x0174, new int [] {0x0057, 0x0302}); - table.put(0x0173, new int [] {0x0075, 0x0328}); - table.put(0x0172, new int [] {0x0055, 0x0328}); - table.put(0x0171, new int [] {0x0075, 0x030B}); - table.put(0x0170, new int [] {0x0055, 0x030B}); - table.put(0x016F, new int [] {0x0075, 0x030A}); - table.put(0x016E, new int [] {0x0055, 0x030A}); - table.put(0x016D, new int [] {0x0075, 0x0306}); - table.put(0x016C, new int [] {0x0055, 0x0306}); - table.put(0x016B, new int [] {0x0075, 0x0304}); - table.put(0x016A, new int [] {0x0055, 0x0304}); - table.put(0x0A5E, new int [] {0x0A2B, 0x0A3C}); - table.put(0x0169, new int [] {0x0075, 0x0303}); - table.put(0x0168, new int [] {0x0055, 0x0303}); - table.put(0x0A5B, new int [] {0x0A1C, 0x0A3C}); - table.put(0x0A5A, new int [] {0x0A17, 0x0A3C}); - table.put(0x0165, new int [] {0x0074, 0x030C}); - table.put(0x0164, new int [] {0x0054, 0x030C}); - table.put(0x0163, new int [] {0x0074, 0x0327}); - table.put(0x0162, new int [] {0x0054, 0x0327}); - table.put(0x0161, new int [] {0x0073, 0x030C}); - table.put(0x0160, new int [] {0x0053, 0x030C}); - table.put(0x0A59, new int [] {0x0A16, 0x0A3C}); - table.put(0x015F, new int [] {0x0073, 0x0327}); - table.put(0x015E, new int [] {0x0053, 0x0327}); - table.put(0x015D, new int [] {0x0073, 0x0302}); - table.put(0x015C, new int [] {0x0053, 0x0302}); - table.put(0x015B, new int [] {0x0073, 0x0301}); - table.put(0x015A, new int [] {0x0053, 0x0301}); - table.put(0x1E9B, new int [] {0x017F, 0x0307}); - table.put(0x0159, new int [] {0x0072, 0x030C}); - table.put(0x0158, new int [] {0x0052, 0x030C}); - table.put(0x1E99, new int [] {0x0079, 0x030A}); - table.put(0x232A, new int [] {0x3009}); - table.put(0x0157, new int [] {0x0072, 0x0327}); - table.put(0x1E98, new int [] {0x0077, 0x030A}); - table.put(0x0156, new int [] {0x0052, 0x0327}); - table.put(0x1E97, new int [] {0x0074, 0x0308}); - table.put(0x0155, new int [] {0x0072, 0x0301}); - table.put(0x1E96, new int [] {0x0068, 0x0331}); - table.put(0x0154, new int [] {0x0052, 0x0301}); - table.put(0x1E95, new int [] {0x007A, 0x0331}); - table.put(0x1E94, new int [] {0x005A, 0x0331}); - table.put(0x1E93, new int [] {0x007A, 0x0323}); - table.put(0x0151, new int [] {0x006F, 0x030B}); - table.put(0x1E92, new int [] {0x005A, 0x0323}); - table.put(0x0150, new int [] {0x004F, 0x030B}); - table.put(0x1E91, new int [] {0x007A, 0x0302}); - table.put(0x2329, new int [] {0x3008}); - table.put(0x1E90, new int [] {0x005A, 0x0302}); - table.put(0x014F, new int [] {0x006F, 0x0306}); - table.put(0x014E, new int [] {0x004F, 0x0306}); - table.put(0x1E8F, new int [] {0x0079, 0x0307}); - table.put(0x014D, new int [] {0x006F, 0x0304}); - table.put(0x1E8E, new int [] {0x0059, 0x0307}); - table.put(0x014C, new int [] {0x004F, 0x0304}); - table.put(0x1E8D, new int [] {0x0078, 0x0308}); - table.put(0x1E8C, new int [] {0x0058, 0x0308}); - table.put(0x1E8B, new int [] {0x0078, 0x0307}); - table.put(0x1E8A, new int [] {0x0058, 0x0307}); - table.put(0x0148, new int [] {0x006E, 0x030C}); - table.put(0x1E89, new int [] {0x0077, 0x0323}); - table.put(0x0147, new int [] {0x004E, 0x030C}); - table.put(0x1E88, new int [] {0x0057, 0x0323}); - table.put(0x0146, new int [] {0x006E, 0x0327}); - table.put(0x1E87, new int [] {0x0077, 0x0307}); - table.put(0x0145, new int [] {0x004E, 0x0327}); - table.put(0x1E86, new int [] {0x0057, 0x0307}); - table.put(0x0144, new int [] {0x006E, 0x0301}); - table.put(0x1E85, new int [] {0x0077, 0x0308}); - table.put(0x0143, new int [] {0x004E, 0x0301}); - table.put(0x1E84, new int [] {0x0057, 0x0308}); - table.put(0x1E83, new int [] {0x0077, 0x0301}); - table.put(0x1E82, new int [] {0x0057, 0x0301}); - table.put(0x1E81, new int [] {0x0077, 0x0300}); - table.put(0x1E80, new int [] {0x0057, 0x0300}); - table.put(0x0A36, new int [] {0x0A38, 0x0A3C}); - table.put(0x0A33, new int [] {0x0A32, 0x0A3C}); - table.put(0x013E, new int [] {0x006C, 0x030C}); - table.put(0x1E7F, new int [] {0x0076, 0x0323}); - table.put(0x013D, new int [] {0x004C, 0x030C}); - table.put(0x1E7E, new int [] {0x0056, 0x0323}); - table.put(0x013C, new int [] {0x006C, 0x0327}); - table.put(0x1E7D, new int [] {0x0076, 0x0303}); - table.put(0x013B, new int [] {0x004C, 0x0327}); - table.put(0x1E7C, new int [] {0x0056, 0x0303}); - table.put(0x013A, new int [] {0x006C, 0x0301}); - table.put(0x1E7B, new int [] {0x0075, 0x0304, 0x0308}); - table.put(0x1E7A, new int [] {0x0055, 0x0304, 0x0308}); - table.put(0x0139, new int [] {0x004C, 0x0301}); - table.put(0x1E79, new int [] {0x0075, 0x0303, 0x0301}); - table.put(0x0137, new int [] {0x006B, 0x0327}); - table.put(0x1E78, new int [] {0x0055, 0x0303, 0x0301}); - table.put(0x0136, new int [] {0x004B, 0x0327}); - table.put(0x1E77, new int [] {0x0075, 0x032D}); - table.put(0x0135, new int [] {0x006A, 0x0302}); - table.put(0x1E76, new int [] {0x0055, 0x032D}); - table.put(0x0134, new int [] {0x004A, 0x0302}); - table.put(0x1E75, new int [] {0x0075, 0x0330}); - table.put(0x1E74, new int [] {0x0055, 0x0330}); - table.put(0x1E73, new int [] {0x0075, 0x0324}); - table.put(0x1E72, new int [] {0x0055, 0x0324}); - table.put(0x0130, new int [] {0x0049, 0x0307}); - table.put(0x1E71, new int [] {0x0074, 0x032D}); - table.put(0x1E70, new int [] {0x0054, 0x032D}); - table.put(0x012F, new int [] {0x0069, 0x0328}); - table.put(0x012E, new int [] {0x0049, 0x0328}); - table.put(0x1E6F, new int [] {0x0074, 0x0331}); - table.put(0x012D, new int [] {0x0069, 0x0306}); - table.put(0x1E6E, new int [] {0x0054, 0x0331}); - table.put(0x012C, new int [] {0x0049, 0x0306}); - table.put(0x1E6D, new int [] {0x0074, 0x0323}); - table.put(0x012B, new int [] {0x0069, 0x0304}); - table.put(0x1E6C, new int [] {0x0054, 0x0323}); - table.put(0x012A, new int [] {0x0049, 0x0304}); - table.put(0x1E6B, new int [] {0x0074, 0x0307}); - table.put(0x1E6A, new int [] {0x0054, 0x0307}); - table.put(0x0129, new int [] {0x0069, 0x0303}); - table.put(0x0128, new int [] {0x0049, 0x0303}); - table.put(0x1E69, new int [] {0x0073, 0x0323, 0x0307}); - table.put(0x1E68, new int [] {0x0053, 0x0323, 0x0307}); - table.put(0x1E67, new int [] {0x0073, 0x030C, 0x0307}); - table.put(0x0125, new int [] {0x0068, 0x0302}); - table.put(0x1E66, new int [] {0x0053, 0x030C, 0x0307}); - table.put(0x0124, new int [] {0x0048, 0x0302}); - table.put(0x1E65, new int [] {0x0073, 0x0301, 0x0307}); - table.put(0x0123, new int [] {0x0067, 0x0327}); - table.put(0x1E64, new int [] {0x0053, 0x0301, 0x0307}); - table.put(0x0122, new int [] {0x0047, 0x0327}); - table.put(0x1E63, new int [] {0x0073, 0x0323}); - table.put(0x0121, new int [] {0x0067, 0x0307}); - table.put(0x1E62, new int [] {0x0053, 0x0323}); - table.put(0x0120, new int [] {0x0047, 0x0307}); - table.put(0x1E61, new int [] {0x0073, 0x0307}); - table.put(0x1E60, new int [] {0x0053, 0x0307}); - table.put(0x30FE, new int [] {0x30FD, 0x3099}); - table.put(0xF99F, new int [] {0x70C8}); - table.put(0xF99E, new int [] {0x54BD}); - table.put(0x011F, new int [] {0x0067, 0x0306}); - table.put(0xF99D, new int [] {0x52A3}); - table.put(0x011E, new int [] {0x0047, 0x0306}); - table.put(0x1E5F, new int [] {0x0072, 0x0331}); - table.put(0x30FA, new int [] {0x30F2, 0x3099}); - table.put(0xF99C, new int [] {0x5217}); - table.put(0x011D, new int [] {0x0067, 0x0302}); - table.put(0x1E5E, new int [] {0x0052, 0x0331}); - table.put(0xF99B, new int [] {0x934A}); - table.put(0x011C, new int [] {0x0047, 0x0302}); - table.put(0x1E5D, new int [] {0x0072, 0x0323, 0x0304}); - table.put(0xF99A, new int [] {0x9023}); - table.put(0x011B, new int [] {0x0065, 0x030C}); - table.put(0x1E5C, new int [] {0x0052, 0x0323, 0x0304}); - table.put(0x011A, new int [] {0x0045, 0x030C}); - table.put(0x1E5B, new int [] {0x0072, 0x0323}); - table.put(0x1E5A, new int [] {0x0052, 0x0323}); - table.put(0x30F9, new int [] {0x30F1, 0x3099}); - table.put(0x30F8, new int [] {0x30F0, 0x3099}); - table.put(0x30F7, new int [] {0x30EF, 0x3099}); - table.put(0xF999, new int [] {0x84EE}); - table.put(0xF998, new int [] {0x8F26}); - table.put(0x0119, new int [] {0x0065, 0x0328}); - table.put(0xF997, new int [] {0x806F}); - table.put(0x0118, new int [] {0x0045, 0x0328}); - table.put(0x1E59, new int [] {0x0072, 0x0307}); - table.put(0x30F4, new int [] {0x30A6, 0x3099}); - table.put(0xF996, new int [] {0x7DF4}); - table.put(0x0117, new int [] {0x0065, 0x0307}); - table.put(0x1E58, new int [] {0x0052, 0x0307}); - table.put(0xF995, new int [] {0x79CA}); - table.put(0x0116, new int [] {0x0045, 0x0307}); - table.put(0x1E57, new int [] {0x0070, 0x0307}); - table.put(0xF994, new int [] {0x7489}); - table.put(0xF993, new int [] {0x7149}); - table.put(0x0115, new int [] {0x0065, 0x0306}); - table.put(0x0477, new int [] {0x0475, 0x030F}); - table.put(0x1E56, new int [] {0x0050, 0x0307}); - table.put(0xF992, new int [] {0x6F23}); - table.put(0x1E55, new int [] {0x0070, 0x0301}); - table.put(0x0476, new int [] {0x0474, 0x030F}); - table.put(0x0114, new int [] {0x0045, 0x0306}); - table.put(0xF991, new int [] {0x649A}); - table.put(0x1E54, new int [] {0x0050, 0x0301}); - table.put(0x0113, new int [] {0x0065, 0x0304}); - table.put(0xF990, new int [] {0x6200}); - table.put(0x1E53, new int [] {0x006F, 0x0304, 0x0301}); - table.put(0x0112, new int [] {0x0045, 0x0304}); - table.put(0x1E52, new int [] {0x004F, 0x0304, 0x0301}); - table.put(0x1E51, new int [] {0x006F, 0x0304, 0x0300}); - table.put(0x1E50, new int [] {0x004F, 0x0304, 0x0300}); - table.put(0xF98F, new int [] {0x6190}); - table.put(0xF98E, new int [] {0x5E74}); - table.put(0xF98D, new int [] {0x8F62}); - table.put(0x010F, new int [] {0x0064, 0x030C}); - table.put(0xF98C, new int [] {0x6B77}); - table.put(0x1E4F, new int [] {0x006F, 0x0303, 0x0308}); - table.put(0x010E, new int [] {0x0044, 0x030C}); - table.put(0xF98B, new int [] {0x66C6}); - table.put(0x1E4E, new int [] {0x004F, 0x0303, 0x0308}); - table.put(0x010D, new int [] {0x0063, 0x030C}); - table.put(0xF98A, new int [] {0x529B}); - table.put(0x1E4D, new int [] {0x006F, 0x0303, 0x0301}); - table.put(0x010C, new int [] {0x0043, 0x030C}); - table.put(0x1E4C, new int [] {0x004F, 0x0303, 0x0301}); - table.put(0x010B, new int [] {0x0063, 0x0307}); - table.put(0x1E4B, new int [] {0x006E, 0x032D}); - table.put(0x010A, new int [] {0x0043, 0x0307}); - table.put(0x1E4A, new int [] {0x004E, 0x032D}); - table.put(0xF989, new int [] {0x9ECE}); - table.put(0xF988, new int [] {0x9E97}); - table.put(0xF987, new int [] {0x9A6A}); - table.put(0x0109, new int [] {0x0063, 0x0302}); - table.put(0xF986, new int [] {0x95AD}); - table.put(0x1E49, new int [] {0x006E, 0x0331}); - table.put(0x0108, new int [] {0x0043, 0x0302}); - table.put(0xF985, new int [] {0x792A}); - table.put(0x1E48, new int [] {0x004E, 0x0331}); - table.put(0x0107, new int [] {0x0063, 0x0301}); - table.put(0xF984, new int [] {0x6FFE}); - table.put(0x1E47, new int [] {0x006E, 0x0323}); - table.put(0x0106, new int [] {0x0043, 0x0301}); - table.put(0xF983, new int [] {0x65C5}); - table.put(0x1E46, new int [] {0x004E, 0x0323}); - table.put(0x0105, new int [] {0x0061, 0x0328}); - table.put(0xF982, new int [] {0x5EEC}); - table.put(0x1E45, new int [] {0x006E, 0x0307}); - table.put(0x0104, new int [] {0x0041, 0x0328}); - table.put(0xF981, new int [] {0x5973}); - table.put(0x1E44, new int [] {0x004E, 0x0307}); - table.put(0x0103, new int [] {0x0061, 0x0306}); - table.put(0xF980, new int [] {0x5442}); - table.put(0x1E43, new int [] {0x006D, 0x0323}); - table.put(0x0102, new int [] {0x0041, 0x0306}); - table.put(0x1E42, new int [] {0x004D, 0x0323}); - table.put(0x0101, new int [] {0x0061, 0x0304}); - table.put(0x1E41, new int [] {0x006D, 0x0307}); - table.put(0x0100, new int [] {0x0041, 0x0304}); - table.put(0x1E40, new int [] {0x004D, 0x0307}); - table.put(0xF97F, new int [] {0x52F5}); - table.put(0x30DD, new int [] {0x30DB, 0x309A}); - table.put(0xF97E, new int [] {0x91CF}); - table.put(0x30DC, new int [] {0x30DB, 0x3099}); - table.put(0xF97D, new int [] {0x8AD2}); - table.put(0x2F8FF, new int [] {0x6D16}); - table.put(0xF97C, new int [] {0x826F}); - table.put(0x30DA, new int [] {0x30D8, 0x309A}); - table.put(0x1E3F, new int [] {0x006D, 0x0301}); - table.put(0x2F8FE, new int [] {0x6C67}); - table.put(0xF97B, new int [] {0x7CE7}); - table.put(0x1E3E, new int [] {0x004D, 0x0301}); - table.put(0x2F8FD, new int [] {0x6CCD}); - table.put(0xF97A, new int [] {0x6881}); - table.put(0x1E3D, new int [] {0x006C, 0x032D}); - table.put(0x045E, new int [] {0x0443, 0x0306}); - table.put(0x2F8FC, new int [] {0x6CBF}); - table.put(0x1E3C, new int [] {0x004C, 0x032D}); - table.put(0x045D, new int [] {0x0438, 0x0300}); - table.put(0x2F8FB, new int [] {0x23CBC}); - table.put(0x1E3B, new int [] {0x006C, 0x0331}); - table.put(0x045C, new int [] {0x043A, 0x0301}); - table.put(0x2F8FA, new int [] {0x6C4E}); - table.put(0x1E3A, new int [] {0x004C, 0x0331}); - table.put(0x30D9, new int [] {0x30D8, 0x3099}); - table.put(0xF979, new int [] {0x51C9}); - table.put(0x30D7, new int [] {0x30D5, 0x309A}); - table.put(0xF978, new int [] {0x5169}); - table.put(0x30D6, new int [] {0x30D5, 0x3099}); - table.put(0xF977, new int [] {0x4EAE}); - table.put(0x2F8F9, new int [] {0x23AFA}); - table.put(0xF976, new int [] {0x7565}); - table.put(0x30D4, new int [] {0x30D2, 0x309A}); - table.put(0x1E39, new int [] {0x006C, 0x0323, 0x0304}); - table.put(0x2F8F8, new int [] {0x21D0B}); - table.put(0xF975, new int [] {0x63A0}); - table.put(0x30D3, new int [] {0x30D2, 0x3099}); - table.put(0x1E38, new int [] {0x004C, 0x0323, 0x0304}); - table.put(0x0D4C, new int [] {0x0D46, 0x0D57}); - table.put(0x2F8F7, new int [] {0x23A8D}); - table.put(0xF974, new int [] {0x82E5}); - table.put(0x1E37, new int [] {0x006C, 0x0323}); - table.put(0x0D4B, new int [] {0x0D47, 0x0D3E}); - table.put(0x2F8F6, new int [] {0x6BBB}); - table.put(0xF973, new int [] {0x62FE}); - table.put(0x30D1, new int [] {0x30CF, 0x309A}); - table.put(0x1E36, new int [] {0x004C, 0x0323}); - table.put(0x0D4A, new int [] {0x0D46, 0x0D3E}); - table.put(0x0457, new int [] {0x0456, 0x0308}); - table.put(0x2F8F5, new int [] {0x6BBA}); - table.put(0xF972, new int [] {0x6C88}); - table.put(0x30D0, new int [] {0x30CF, 0x3099}); - table.put(0x1E35, new int [] {0x006B, 0x0331}); - table.put(0x2F8F4, new int [] {0x6B9F}); - table.put(0xF971, new int [] {0x8FB0}); - table.put(0x1E34, new int [] {0x004B, 0x0331}); - table.put(0x2F8F3, new int [] {0x6B72}); - table.put(0xF970, new int [] {0x6BBA}); - table.put(0x1E33, new int [] {0x006B, 0x0323}); - table.put(0x2F8F2, new int [] {0x3C4E}); - table.put(0x1E32, new int [] {0x004B, 0x0323}); - table.put(0x0453, new int [] {0x0433, 0x0301}); - table.put(0x2F8F1, new int [] {0x6B54}); - table.put(0x1E31, new int [] {0x006B, 0x0301}); - table.put(0x2F8F0, new int [] {0x238A7}); - table.put(0x1E30, new int [] {0x004B, 0x0301}); - table.put(0x0451, new int [] {0x0435, 0x0308}); - table.put(0x0450, new int [] {0x0435, 0x0300}); - table.put(0xF96F, new int [] {0x8AAA}); - table.put(0xF96E, new int [] {0x8449}); - table.put(0xF96D, new int [] {0x7701}); - table.put(0x2F8EF, new int [] {0x6B21}); - table.put(0xF96C, new int [] {0x585E}); - table.put(0x1E2F, new int [] {0x0069, 0x0308, 0x0301}); - table.put(0x2F8EE, new int [] {0x3C18}); - table.put(0xF96B, new int [] {0x53C3}); - table.put(0x1E2E, new int [] {0x0049, 0x0308, 0x0301}); - table.put(0x2F8ED, new int [] {0x6ADB}); - table.put(0xF96A, new int [] {0x7D22}); - table.put(0x1E2D, new int [] {0x0069, 0x0330}); - table.put(0x2F8EC, new int [] {0x236A3}); - table.put(0x1E2C, new int [] {0x0049, 0x0330}); - table.put(0x2F8EB, new int [] {0x6AA8}); - table.put(0x1E2B, new int [] {0x0068, 0x032E}); - table.put(0x2F8EA, new int [] {0x69EA}); - table.put(0x1E2A, new int [] {0x0048, 0x032E}); - table.put(0x30C9, new int [] {0x30C8, 0x3099}); - table.put(0xF969, new int [] {0x6578}); - table.put(0x30C7, new int [] {0x30C6, 0x3099}); - table.put(0xF968, new int [] {0x6CCC}); - table.put(0xF967, new int [] {0x4E0D}); - table.put(0x30C5, new int [] {0x30C4, 0x3099}); - table.put(0x2F8E9, new int [] {0x69A3}); - table.put(0xF966, new int [] {0x5FA9}); - table.put(0x1E29, new int [] {0x0068, 0x0327}); - table.put(0x2F8E8, new int [] {0x6942}); - table.put(0xF965, new int [] {0x4FBF}); - table.put(0x1E28, new int [] {0x0048, 0x0327}); - table.put(0x2F8E7, new int [] {0x3B9D}); - table.put(0xF964, new int [] {0x78FB}); - table.put(0x30C2, new int [] {0x30C1, 0x3099}); - table.put(0x1E27, new int [] {0x0068, 0x0308}); - table.put(0x2F8E6, new int [] {0x6914}); - table.put(0xF963, new int [] {0x5317}); - table.put(0x1E26, new int [] {0x0048, 0x0308}); - table.put(0x2F8E5, new int [] {0x681F}); - table.put(0xF962, new int [] {0x7570}); - table.put(0x30C0, new int [] {0x30BF, 0x3099}); - table.put(0x1E25, new int [] {0x0068, 0x0323}); - table.put(0x2F8E4, new int [] {0x688E}); - table.put(0xF961, new int [] {0x7387}); - table.put(0x1E24, new int [] {0x0048, 0x0323}); - table.put(0x2F8E3, new int [] {0x2346D}); - table.put(0xF960, new int [] {0x6012}); - table.put(0x1E23, new int [] {0x0068, 0x0307}); - table.put(0x2F8E2, new int [] {0x6885}); - table.put(0x1E22, new int [] {0x0048, 0x0307}); - table.put(0x2F8E1, new int [] {0x6852}); - table.put(0x1E21, new int [] {0x0067, 0x0304}); - table.put(0x2F8E0, new int [] {0x6785}); - table.put(0x1E20, new int [] {0x0047, 0x0304}); - table.put(0x30BE, new int [] {0x30BD, 0x3099}); - table.put(0xF95F, new int [] {0x5BE7}); - table.put(0xF95E, new int [] {0x4E39}); - table.put(0x30BC, new int [] {0x30BB, 0x3099}); - table.put(0xF95D, new int [] {0x8AFE}); - table.put(0x2F8DF, new int [] {0x67FA}); - table.put(0xF95C, new int [] {0x6A02}); - table.put(0x30BA, new int [] {0x30B9, 0x3099}); - table.put(0x1E1F, new int [] {0x0066, 0x0307}); - table.put(0x2F8DE, new int [] {0x3B49}); - table.put(0xF95B, new int [] {0x62CF}); - table.put(0x1E1E, new int [] {0x0046, 0x0307}); - table.put(0x2F8DD, new int [] {0x233C3}); - table.put(0xF95A, new int [] {0x8B80}); - table.put(0x1E1D, new int [] {0x0065, 0x0327, 0x0306}); - table.put(0x2F8DC, new int [] {0x6753}); - table.put(0x1E1C, new int [] {0x0045, 0x0327, 0x0306}); - table.put(0x2F8DB, new int [] {0x675E}); - table.put(0x1E1B, new int [] {0x0065, 0x0330}); - table.put(0x2F8DA, new int [] {0x6721}); - table.put(0x1E1A, new int [] {0x0045, 0x0330}); - table.put(0x30B8, new int [] {0x30B7, 0x3099}); - table.put(0xF959, new int [] {0x9675}); - table.put(0xF958, new int [] {0x83F1}); - table.put(0x30B6, new int [] {0x30B5, 0x3099}); - table.put(0xF957, new int [] {0x7DBE}); - table.put(0x2F8D9, new int [] {0x671B}); - table.put(0xF956, new int [] {0x7A1C}); - table.put(0x30B4, new int [] {0x30B3, 0x3099}); - table.put(0x1E19, new int [] {0x0065, 0x032D}); - table.put(0x2F8D8, new int [] {0x6717}); - table.put(0xF955, new int [] {0x51CC}); - table.put(0x1E18, new int [] {0x0045, 0x032D}); - table.put(0x0439, new int [] {0x0438, 0x0306}); - table.put(0x2F8D7, new int [] {0x43D9}); - table.put(0xF954, new int [] {0x51DC}); - table.put(0x30B2, new int [] {0x30B1, 0x3099}); - table.put(0x1E17, new int [] {0x0065, 0x0304, 0x0301}); - table.put(0x2F8D6, new int [] {0x80AD}); - table.put(0xF953, new int [] {0x808B}); - table.put(0x1E16, new int [] {0x0045, 0x0304, 0x0301}); - table.put(0x2F8D5, new int [] {0x669C}); - table.put(0xF952, new int [] {0x52D2}); - table.put(0x30B0, new int [] {0x30AF, 0x3099}); - table.put(0x1E15, new int [] {0x0065, 0x0304, 0x0300}); - table.put(0x2F8D4, new int [] {0x6700}); - table.put(0xF951, new int [] {0x964B}); - table.put(0x1E14, new int [] {0x0045, 0x0304, 0x0300}); - table.put(0x2F8D3, new int [] {0x5195}); - table.put(0xF950, new int [] {0x7E37}); - table.put(0x1E13, new int [] {0x0064, 0x032D}); - table.put(0x2F8D2, new int [] {0x5192}); - table.put(0x1E12, new int [] {0x0044, 0x032D}); - table.put(0x2F8D1, new int [] {0x3AE4}); - table.put(0x1E11, new int [] {0x0064, 0x0327}); - table.put(0x2F8D0, new int [] {0x3B08}); - table.put(0x1E10, new int [] {0x0044, 0x0327}); - table.put(0x30AE, new int [] {0x30AD, 0x3099}); - table.put(0xF94F, new int [] {0x7D2F}); - table.put(0xF94E, new int [] {0x6F0F}); - table.put(0x30AC, new int [] {0x30AB, 0x3099}); - table.put(0xF94D, new int [] {0x6DDA}); - table.put(0x2F8CF, new int [] {0x6691}); - table.put(0xF94C, new int [] {0x6A13}); - table.put(0x1E0F, new int [] {0x0064, 0x0331}); - table.put(0x2F8CE, new int [] {0x3B19}); - table.put(0xF94B, new int [] {0x5C62}); - table.put(0x1E0E, new int [] {0x0044, 0x0331}); - table.put(0x2F8CD, new int [] {0x6649}); - table.put(0xF94A, new int [] {0x58D8}); - table.put(0x1E0D, new int [] {0x0064, 0x0323}); - table.put(0x2F8CC, new int [] {0x66F8}); - table.put(0x1E0C, new int [] {0x0044, 0x0323}); - table.put(0x2F8CB, new int [] {0x65E3}); - table.put(0x1E0B, new int [] {0x0064, 0x0307}); - table.put(0x2F8CA, new int [] {0x2300A}); - table.put(0x1E0A, new int [] {0x0044, 0x0307}); - table.put(0xF949, new int [] {0x96F7}); - table.put(0xF948, new int [] {0x8CC2}); - table.put(0xF947, new int [] {0x78CA}); - table.put(0x2F8C9, new int [] {0x656C}); - table.put(0xF946, new int [] {0x7262}); - table.put(0x1E09, new int [] {0x0063, 0x0327, 0x0301}); - table.put(0x2F8C8, new int [] {0x654F}); - table.put(0xF945, new int [] {0x807E}); - table.put(0x1E08, new int [] {0x0043, 0x0327, 0x0301}); - table.put(0x2F8C7, new int [] {0x3A6C}); - table.put(0xF944, new int [] {0x7C60}); - table.put(0x1E07, new int [] {0x0062, 0x0331}); - table.put(0x2F8C6, new int [] {0x6477}); - table.put(0xF943, new int [] {0x5F04}); - table.put(0x1E06, new int [] {0x0042, 0x0331}); - table.put(0x2F8C5, new int [] {0x649D}); - table.put(0xF942, new int [] {0x58DF}); - table.put(0x1E05, new int [] {0x0062, 0x0323}); - table.put(0x2F8C4, new int [] {0x647E}); - table.put(0xF941, new int [] {0x8AD6}); - table.put(0x1E04, new int [] {0x0042, 0x0323}); - table.put(0x2F8C3, new int [] {0x6469}); - table.put(0xF940, new int [] {0x9E7F}); - table.put(0x1E03, new int [] {0x0062, 0x0307}); - table.put(0x2F8C2, new int [] {0x3A2E}); - table.put(0x1E02, new int [] {0x0042, 0x0307}); - table.put(0x2F8C1, new int [] {0x63A9}); - table.put(0x1E01, new int [] {0x0061, 0x0325}); - table.put(0x2F8C0, new int [] {0x63C5}); - table.put(0x1E00, new int [] {0x0041, 0x0325}); - table.put(0xF93F, new int [] {0x9304}); - table.put(0xF93E, new int [] {0x83C9}); - table.put(0xF93D, new int [] {0x7DA0}); - table.put(0x2F8BF, new int [] {0x6422}); - table.put(0xF93C, new int [] {0x797F}); - table.put(0x2F8BE, new int [] {0x22BF1}); - table.put(0xF93B, new int [] {0x788C}); - table.put(0x2F8BD, new int [] {0x63E4}); - table.put(0xF93A, new int [] {0x9DFA}); - table.put(0x2F8BC, new int [] {0x6383}); - table.put(0x2F8BB, new int [] {0x6368}); - table.put(0x2F8BA, new int [] {0x62FC}); - table.put(0xF939, new int [] {0x9B6F}); - table.put(0xF938, new int [] {0x9732}); - table.put(0xF937, new int [] {0x8DEF}); - table.put(0x2F8B9, new int [] {0x633D}); - table.put(0xF936, new int [] {0x865C}); - table.put(0x2F8B8, new int [] {0x22B0C}); - table.put(0xF935, new int [] {0x8606}); - table.put(0x0419, new int [] {0x0418, 0x0306}); - table.put(0x2F8B7, new int [] {0x6350}); - table.put(0xF934, new int [] {0x8001}); - table.put(0x2F8B6, new int [] {0x62D4}); - table.put(0xF933, new int [] {0x76E7}); - table.put(0x2F8B5, new int [] {0x62B1}); - table.put(0xF932, new int [] {0x7210}); - table.put(0x2F8B4, new int [] {0x625D}); - table.put(0xF931, new int [] {0x6AD3}); - table.put(0x2F8B3, new int [] {0x621B}); - table.put(0xF930, new int [] {0x64C4}); - table.put(0x2F8B2, new int [] {0x6210}); - table.put(0x2F8B1, new int [] {0x61F6}); - table.put(0x2F8B0, new int [] {0x61F2}); - table.put(0xF92F, new int [] {0x52DE}); - table.put(0xF92E, new int [] {0x51B7}); - table.put(0xF92D, new int [] {0x4F86}); - table.put(0x2F8AF, new int [] {0x61DE}); - table.put(0xF92C, new int [] {0x90CE}); - table.put(0x2F8AE, new int [] {0x61AF}); - table.put(0xF92B, new int [] {0x72FC}); - table.put(0x2F8AD, new int [] {0x61A4}); - table.put(0xF92A, new int [] {0x6D6A}); - table.put(0x040E, new int [] {0x0423, 0x0306}); - table.put(0x2F8AC, new int [] {0x61B2}); - table.put(0x040D, new int [] {0x0418, 0x0300}); - table.put(0x2F8AB, new int [] {0x618E}); - table.put(0x040C, new int [] {0x041A, 0x0301}); - table.put(0x2F8AA, new int [] {0x617A}); - table.put(0xF929, new int [] {0x6717}); - table.put(0xF928, new int [] {0x5ECA}); - table.put(0xF927, new int [] {0x881F}); - table.put(0x2F8A9, new int [] {0x614C}); - table.put(0xF926, new int [] {0x81D8}); - table.put(0x2F8A8, new int [] {0x614E}); - table.put(0xF925, new int [] {0x62C9}); - table.put(0x2F8A7, new int [] {0x614C}); - table.put(0xF924, new int [] {0x8964}); - table.put(0x2F8A6, new int [] {0x6148}); - table.put(0xF923, new int [] {0x85CD}); - table.put(0x0407, new int [] {0x0406, 0x0308}); - table.put(0x2F8A5, new int [] {0x60C7}); - table.put(0xF922, new int [] {0x6FEB}); - table.put(0x2F8A4, new int [] {0x226D4}); - table.put(0xF921, new int [] {0x5D50}); - table.put(0x2F8A3, new int [] {0x6094}); - table.put(0xF920, new int [] {0x9E1E}); - table.put(0x2F8A2, new int [] {0x391C}); - table.put(0x0403, new int [] {0x0413, 0x0301}); - table.put(0x2F8A1, new int [] {0x393A}); - table.put(0x2F8A0, new int [] {0x6081}); - table.put(0x0401, new int [] {0x0415, 0x0308}); - table.put(0x0400, new int [] {0x0415, 0x0300}); - table.put(0xF91F, new int [] {0x862D}); - table.put(0xF91E, new int [] {0x721B}); - table.put(0xF91D, new int [] {0x6B04}); - table.put(0xF91C, new int [] {0x5375}); - table.put(0xF91B, new int [] {0x4E82}); - table.put(0xF91A, new int [] {0x99F1}); - table.put(0xF919, new int [] {0x916A}); - table.put(0x22ED, new int [] {0x22B5, 0x0338}); - table.put(0xF918, new int [] {0x843D}); - table.put(0x22EC, new int [] {0x22B4, 0x0338}); - table.put(0xF917, new int [] {0x73DE}); - table.put(0x22EB, new int [] {0x22B3, 0x0338}); - table.put(0xF916, new int [] {0x70D9}); - table.put(0x22EA, new int [] {0x22B2, 0x0338}); - table.put(0xF915, new int [] {0x6D1B}); - table.put(0xF914, new int [] {0x6A02}); - table.put(0xF913, new int [] {0x908F}); - table.put(0xF912, new int [] {0x88F8}); - table.put(0xF911, new int [] {0x87BA}); - table.put(0xF910, new int [] {0x863F}); - table.put(0x22E3, new int [] {0x2292, 0x0338}); - table.put(0xF90F, new int [] {0x7F85}); - table.put(0x22E2, new int [] {0x2291, 0x0338}); - table.put(0xF90E, new int [] {0x7669}); - table.put(0x22E1, new int [] {0x227D, 0x0338}); - table.put(0xF90D, new int [] {0x61F6}); - table.put(0x22E0, new int [] {0x227C, 0x0338}); - table.put(0xF90C, new int [] {0x5948}); - table.put(0xF90B, new int [] {0x5587}); - table.put(0xF90A, new int [] {0x91D1}); - table.put(0xF909, new int [] {0x5951}); - table.put(0xF908, new int [] {0x9F9C}); - table.put(0xF907, new int [] {0x9F9C}); - table.put(0xF906, new int [] {0x53E5}); - table.put(0xF905, new int [] {0x4E32}); - table.put(0xF904, new int [] {0x6ED1}); - table.put(0xF903, new int [] {0x8CC8}); - table.put(0xF902, new int [] {0x8ECA}); - table.put(0xF901, new int [] {0x66F4}); - table.put(0xF900, new int [] {0x8C48}); - table.put(0x00FF, new int [] {0x0079, 0x0308}); - table.put(0x00FD, new int [] {0x0079, 0x0301}); - table.put(0x00FC, new int [] {0x0075, 0x0308}); - table.put(0x00FB, new int [] {0x0075, 0x0302}); - table.put(0x00FA, new int [] {0x0075, 0x0301}); - table.put(0x00F9, new int [] {0x0075, 0x0300}); - table.put(0x00F6, new int [] {0x006F, 0x0308}); - table.put(0x00F5, new int [] {0x006F, 0x0303}); - table.put(0x00F4, new int [] {0x006F, 0x0302}); - table.put(0x00F3, new int [] {0x006F, 0x0301}); - table.put(0x00F2, new int [] {0x006F, 0x0300}); - table.put(0x00F1, new int [] {0x006E, 0x0303}); - table.put(0x00EF, new int [] {0x0069, 0x0308}); - table.put(0x00EE, new int [] {0x0069, 0x0302}); - table.put(0x00ED, new int [] {0x0069, 0x0301}); - table.put(0x00EC, new int [] {0x0069, 0x0300}); - table.put(0x00EB, new int [] {0x0065, 0x0308}); - table.put(0x00EA, new int [] {0x0065, 0x0302}); - table.put(0x00E9, new int [] {0x0065, 0x0301}); - table.put(0x00E8, new int [] {0x0065, 0x0300}); - table.put(0x00E7, new int [] {0x0063, 0x0327}); - table.put(0x00E5, new int [] {0x0061, 0x030A}); - table.put(0x00E4, new int [] {0x0061, 0x0308}); - table.put(0x00E3, new int [] {0x0061, 0x0303}); - table.put(0x00E2, new int [] {0x0061, 0x0302}); - table.put(0x00E1, new int [] {0x0061, 0x0301}); - table.put(0x00E0, new int [] {0x0061, 0x0300}); - table.put(0x00DD, new int [] {0x0059, 0x0301}); - table.put(0x00DC, new int [] {0x0055, 0x0308}); - table.put(0x00DB, new int [] {0x0055, 0x0302}); - table.put(0x00DA, new int [] {0x0055, 0x0301}); - table.put(0x22AF, new int [] {0x22AB, 0x0338}); - table.put(0x22AE, new int [] {0x22A9, 0x0338}); - table.put(0x22AD, new int [] {0x22A8, 0x0338}); - table.put(0x22AC, new int [] {0x22A2, 0x0338}); - table.put(0x00D9, new int [] {0x0055, 0x0300}); - table.put(0x00D6, new int [] {0x004F, 0x0308}); - table.put(0x00D5, new int [] {0x004F, 0x0303}); - table.put(0x00D4, new int [] {0x004F, 0x0302}); - table.put(0x00D3, new int [] {0x004F, 0x0301}); - table.put(0x00D2, new int [] {0x004F, 0x0300}); - table.put(0x00D1, new int [] {0x004E, 0x0303}); - table.put(0x309E, new int [] {0x309D, 0x3099}); - table.put(0x00CF, new int [] {0x0049, 0x0308}); - table.put(0x00CE, new int [] {0x0049, 0x0302}); - table.put(0x00CD, new int [] {0x0049, 0x0301}); - table.put(0x00CC, new int [] {0x0049, 0x0300}); - table.put(0x00CB, new int [] {0x0045, 0x0308}); - table.put(0x00CA, new int [] {0x0045, 0x0302}); - table.put(0x00C9, new int [] {0x0045, 0x0301}); - table.put(0x3094, new int [] {0x3046, 0x3099}); - table.put(0x00C8, new int [] {0x0045, 0x0300}); - table.put(0x00C7, new int [] {0x0043, 0x0327}); - table.put(0x00C5, new int [] {0x0041, 0x030A}); - table.put(0x00C4, new int [] {0x0041, 0x0308}); - table.put(0x00C3, new int [] {0x0041, 0x0303}); - table.put(0x00C2, new int [] {0x0041, 0x0302}); - table.put(0x00C1, new int [] {0x0041, 0x0301}); - table.put(0x00C0, new int [] {0x0041, 0x0300}); - table.put(0x307D, new int [] {0x307B, 0x309A}); - table.put(0x307C, new int [] {0x307B, 0x3099}); - table.put(0x2F89F, new int [] {0x5FF9}); - table.put(0x307A, new int [] {0x3078, 0x309A}); - table.put(0x2F89E, new int [] {0x5FD7}); - table.put(0x2F89D, new int [] {0x5FCD}); - table.put(0x2F89C, new int [] {0x5F9A}); - table.put(0x2F89B, new int [] {0x38E3}); - table.put(0x2F89A, new int [] {0x5F6B}); - table.put(0x3079, new int [] {0x3078, 0x3099}); - table.put(0x3077, new int [] {0x3075, 0x309A}); - table.put(0x3076, new int [] {0x3075, 0x3099}); - table.put(0x2F899, new int [] {0x5F62}); - table.put(0x3074, new int [] {0x3072, 0x309A}); - table.put(0x2F898, new int [] {0x261DA}); - table.put(0x3073, new int [] {0x3072, 0x3099}); - table.put(0x2F897, new int [] {0x232B8}); - table.put(0x2F896, new int [] {0x38C7}); - table.put(0x3071, new int [] {0x306F, 0x309A}); - table.put(0x2F895, new int [] {0x5F22}); - table.put(0x3070, new int [] {0x306F, 0x3099}); - table.put(0x2F894, new int [] {0x5F22}); - table.put(0x2F893, new int [] {0x8201}); - table.put(0x2F892, new int [] {0x22331}); - table.put(0x2F891, new int [] {0x22331}); - table.put(0x2F890, new int [] {0x5EFE}); - table.put(0x2F88F, new int [] {0x2A392}); - table.put(0x2F88E, new int [] {0x5ECA}); - table.put(0x2F88D, new int [] {0x5EB6}); - table.put(0x2F88C, new int [] {0x5EB3}); - table.put(0x2F88B, new int [] {0x5EB0}); - table.put(0x2F88A, new int [] {0x387C}); - table.put(0x3069, new int [] {0x3068, 0x3099}); - table.put(0x3067, new int [] {0x3066, 0x3099}); - table.put(0x3065, new int [] {0x3064, 0x3099}); - table.put(0x2F889, new int [] {0x22183}); - table.put(0x2F888, new int [] {0x3862}); - table.put(0x2F887, new int [] {0x5E69}); - table.put(0x3062, new int [] {0x3061, 0x3099}); - table.put(0x2F886, new int [] {0x5E3D}); - table.put(0x2F885, new int [] {0x5E28}); - table.put(0x3060, new int [] {0x305F, 0x3099}); - table.put(0x2F884, new int [] {0x5DFD}); - table.put(0x2F883, new int [] {0x382F}); - table.put(0x2F882, new int [] {0x5DE2}); - table.put(0x2F881, new int [] {0x5DE1}); - table.put(0x2F880, new int [] {0x5D7C}); - table.put(0x305E, new int [] {0x305D, 0x3099}); - table.put(0x305C, new int [] {0x305B, 0x3099}); - table.put(0x2F87F, new int [] {0x5D6B}); - table.put(0x305A, new int [] {0x3059, 0x3099}); - table.put(0x2F87E, new int [] {0x5D6E}); - table.put(0x2F87D, new int [] {0x21DE6}); - table.put(0x2F87C, new int [] {0x5D43}); - table.put(0x2F87B, new int [] {0x21DE4}); - table.put(0x2F87A, new int [] {0x5C8D}); - table.put(0x3058, new int [] {0x3057, 0x3099}); - table.put(0x3056, new int [] {0x3055, 0x3099}); - table.put(0x2F879, new int [] {0x5CC0}); - table.put(0x3054, new int [] {0x3053, 0x3099}); - table.put(0x2F878, new int [] {0x5C6E}); - table.put(0x2F877, new int [] {0x5C60}); - table.put(0x3052, new int [] {0x3051, 0x3099}); - table.put(0x2F876, new int [] {0x3781}); - table.put(0x2F875, new int [] {0x5C22}); - table.put(0x3050, new int [] {0x304F, 0x3099}); - table.put(0x2F874, new int [] {0x5F53}); - table.put(0x2F873, new int [] {0x5C06}); - table.put(0x2F872, new int [] {0x5BFF}); - table.put(0x2F871, new int [] {0x21B18}); - table.put(0x2F870, new int [] {0x5BF3}); - table.put(0x304E, new int [] {0x304D, 0x3099}); - table.put(0x304C, new int [] {0x304B, 0x3099}); - table.put(0x2F86F, new int [] {0x5BE7}); - table.put(0x2F86E, new int [] {0x5BD8}); - table.put(0x2F86D, new int [] {0x5BC3}); - table.put(0x2F86C, new int [] {0x219C8}); - table.put(0x2F86B, new int [] {0x5B3E}); - table.put(0x2F86A, new int [] {0x5B3E}); - table.put(0x2F869, new int [] {0x5B08}); - table.put(0x2F868, new int [] {0x36FC}); - table.put(0x2F867, new int [] {0x36EE}); - table.put(0x0CCB, new int [] {0x0CC6, 0x0CC2, 0x0CD5}); - table.put(0x2F866, new int [] {0x5A66}); - table.put(0x0CCA, new int [] {0x0CC6, 0x0CC2}); - table.put(0x2F865, new int [] {0x59D8}); - table.put(0x2F864, new int [] {0x5A27}); - table.put(0x2F863, new int [] {0x5A1B}); - table.put(0x03D4, new int [] {0x03D2, 0x0308}); - table.put(0x2F862, new int [] {0x59EC}); - table.put(0x03D3, new int [] {0x03D2, 0x0301}); - table.put(0x2F861, new int [] {0x216EA}); - table.put(0x2F860, new int [] {0x216A8}); - table.put(0x0CC8, new int [] {0x0CC6, 0x0CD6}); - table.put(0x0CC7, new int [] {0x0CC6, 0x0CD5}); - table.put(0x2F85F, new int [] {0x5962}); - table.put(0x2F85E, new int [] {0x5922}); - table.put(0x2F85D, new int [] {0x591A}); - table.put(0x0CC0, new int [] {0x0CBF, 0x0CD5}); - table.put(0x03CE, new int [] {0x03C9, 0x0301}); - table.put(0x2F85C, new int [] {0x5906}); - table.put(0x03CD, new int [] {0x03C5, 0x0301}); - table.put(0x2F85B, new int [] {0x58F7}); - table.put(0x03CC, new int [] {0x03BF, 0x0301}); - table.put(0x2F85A, new int [] {0x58F2}); - table.put(0x03CB, new int [] {0x03C5, 0x0308}); - table.put(0x03CA, new int [] {0x03B9, 0x0308}); - table.put(0x2F859, new int [] {0x214E4}); - table.put(0x2F858, new int [] {0x58AC}); - table.put(0x2F857, new int [] {0x5831}); - table.put(0x2F856, new int [] {0x5832}); - table.put(0x2F855, new int [] {0x578B}); - table.put(0x2F854, new int [] {0x580D}); - table.put(0x2F853, new int [] {0x57F4}); - table.put(0x2F852, new int [] {0x57CE}); - table.put(0x2F851, new int [] {0x58EE}); - table.put(0x2F850, new int [] {0x5207}); - table.put(0x2F84F, new int [] {0x5674}); - table.put(0x2F84E, new int [] {0x5651}); - table.put(0x2F84D, new int [] {0x5717}); - table.put(0x2F84C, new int [] {0x5606}); - table.put(0x2F84B, new int [] {0x5716}); - table.put(0x2F84A, new int [] {0x55C2}); - table.put(0x2F849, new int [] {0x55B3}); - table.put(0x2F848, new int [] {0x55AB}); - table.put(0x2F847, new int [] {0x5599}); - table.put(0x2F846, new int [] {0x5584}); - table.put(0x2F845, new int [] {0x5584}); - table.put(0x2F844, new int [] {0x5563}); - table.put(0x2F843, new int [] {0x5553}); - table.put(0x2F842, new int [] {0x5510}); - table.put(0x2F841, new int [] {0x54F6}); - table.put(0x2F840, new int [] {0x54A2}); - table.put(0x03B0, new int [] {0x03C5, 0x0308, 0x0301}); - table.put(0x2F83F, new int [] {0x5468}); - table.put(0x2F83E, new int [] {0x5448}); - table.put(0x03AF, new int [] {0x03B9, 0x0301}); - table.put(0x2F83D, new int [] {0x5438}); - table.put(0x03AE, new int [] {0x03B7, 0x0301}); - table.put(0x2F83C, new int [] {0x549E}); - table.put(0x03AD, new int [] {0x03B5, 0x0301}); - table.put(0x2F83B, new int [] {0x5406}); - table.put(0x03AC, new int [] {0x03B1, 0x0301}); - table.put(0x2F83A, new int [] {0x53F1}); - table.put(0x03AB, new int [] {0x03A5, 0x0308}); - table.put(0x03AA, new int [] {0x0399, 0x0308}); - table.put(0x2F839, new int [] {0x53EB}); - table.put(0x2F838, new int [] {0x20B63}); - table.put(0x2F837, new int [] {0x53DF}); - table.put(0x2F836, new int [] {0x53CA}); - table.put(0x2F835, new int [] {0x7070}); - table.put(0x2F834, new int [] {0x20A2C}); - table.put(0x2F833, new int [] {0x537F}); - table.put(0x2F832, new int [] {0x537F}); - table.put(0x2F831, new int [] {0x537F}); - table.put(0x2289, new int [] {0x2287, 0x0338}); - table.put(0x2F830, new int [] {0x537D}); - table.put(0x2288, new int [] {0x2286, 0x0338}); - table.put(0x2285, new int [] {0x2283, 0x0338}); - table.put(0x2284, new int [] {0x2282, 0x0338}); - table.put(0x2281, new int [] {0x227B, 0x0338}); - table.put(0x2280, new int [] {0x227A, 0x0338}); - table.put(0x2F82F, new int [] {0x5373}); - table.put(0x2F82E, new int [] {0x535A}); - table.put(0x2F82D, new int [] {0x5351}); - table.put(0x2F82C, new int [] {0x5349}); - table.put(0x2F82B, new int [] {0x5317}); - table.put(0x2F82A, new int [] {0x5306}); - table.put(0x2F829, new int [] {0x5305}); - table.put(0x2F828, new int [] {0x52FA}); - table.put(0x2F827, new int [] {0x52E4}); - table.put(0x2F826, new int [] {0x52C9}); - table.put(0x2F825, new int [] {0x52C7}); - table.put(0x2F824, new int [] {0x3515}); - table.put(0x2F823, new int [] {0x5277}); - table.put(0x2F822, new int [] {0x5272}); - table.put(0x2F821, new int [] {0x5246}); - table.put(0x2279, new int [] {0x2277, 0x0338}); - table.put(0x2F820, new int [] {0x523B}); - table.put(0x2278, new int [] {0x2276, 0x0338}); - table.put(0x2275, new int [] {0x2273, 0x0338}); - table.put(0x2274, new int [] {0x2272, 0x0338}); - table.put(0x2271, new int [] {0x2265, 0x0338}); - table.put(0x2270, new int [] {0x2264, 0x0338}); - table.put(0x2F81F, new int [] {0x34DF}); - table.put(0x2F81E, new int [] {0x5203}); - table.put(0x2F81D, new int [] {0x51F5}); - table.put(0x2F81C, new int [] {0x291DF}); - table.put(0x2F81B, new int [] {0x51B5}); - table.put(0xFB4E, new int [] {0x05E4, 0x05BF}); - table.put(0x2F81A, new int [] {0x51AC}); - table.put(0xFB4D, new int [] {0x05DB, 0x05BF}); - table.put(0xFB4C, new int [] {0x05D1, 0x05BF}); - table.put(0xFB4B, new int [] {0x05D5, 0x05B9}); - table.put(0x226F, new int [] {0x003E, 0x0338}); - table.put(0xFB4A, new int [] {0x05EA, 0x05BC}); - table.put(0x226E, new int [] {0x003C, 0x0338}); - table.put(0x226D, new int [] {0x224D, 0x0338}); - table.put(0x2F819, new int [] {0x4ECC}); - table.put(0x2F818, new int [] {0x51A4}); - table.put(0x2F817, new int [] {0x5197}); - table.put(0x2F816, new int [] {0x2054B}); - table.put(0xFB49, new int [] {0x05E9, 0x05BC}); - table.put(0x2F815, new int [] {0x518D}); - table.put(0xFB48, new int [] {0x05E8, 0x05BC}); - table.put(0x2F814, new int [] {0x5167}); - table.put(0xFB47, new int [] {0x05E7, 0x05BC}); - table.put(0x2F813, new int [] {0x34B9}); - table.put(0xFB46, new int [] {0x05E6, 0x05BC}); - table.put(0x2F812, new int [] {0x2051C}); - table.put(0x2F811, new int [] {0x5177}); - table.put(0xFB44, new int [] {0x05E4, 0x05BC}); - table.put(0x2F810, new int [] {0x5164}); - table.put(0xFB43, new int [] {0x05E3, 0x05BC}); - table.put(0xFB41, new int [] {0x05E1, 0x05BC}); - table.put(0xFB40, new int [] {0x05E0, 0x05BC}); - table.put(0x2262, new int [] {0x2261, 0x0338}); - table.put(0x2260, new int [] {0x003D, 0x0338}); - table.put(0x2F80F, new int [] {0x5154}); - table.put(0x2F80E, new int [] {0x514D}); - table.put(0x2F80D, new int [] {0x2063A}); - table.put(0x2F80C, new int [] {0x349E}); - table.put(0x2F80B, new int [] {0x50CF}); - table.put(0xFB3E, new int [] {0x05DE, 0x05BC}); - table.put(0x2F80A, new int [] {0x50E7}); - table.put(0xFB3C, new int [] {0x05DC, 0x05BC}); - table.put(0xFB3B, new int [] {0x05DB, 0x05BC}); - table.put(0xFB3A, new int [] {0x05DA, 0x05BC}); - table.put(0x2F809, new int [] {0x5099}); - table.put(0x2F808, new int [] {0x507A}); - table.put(0x2F807, new int [] {0x5002}); - table.put(0x2F806, new int [] {0x4FBB}); - table.put(0xFB39, new int [] {0x05D9, 0x05BC}); - table.put(0x2F805, new int [] {0x4FAE}); - table.put(0xFB38, new int [] {0x05D8, 0x05BC}); - table.put(0x2F804, new int [] {0x4F60}); - table.put(0x2F803, new int [] {0x20122}); - table.put(0xFB36, new int [] {0x05D6, 0x05BC}); - table.put(0x2F802, new int [] {0x4E41}); - table.put(0xFB35, new int [] {0x05D5, 0x05BC}); - table.put(0x2F801, new int [] {0x4E38}); - table.put(0xFB34, new int [] {0x05D4, 0x05BC}); - table.put(0x2F800, new int [] {0x4E3D}); - table.put(0x06D3, new int [] {0x06D2, 0x0654}); - table.put(0xFB33, new int [] {0x05D3, 0x05BC}); - table.put(0xFB32, new int [] {0x05D2, 0x05BC}); - table.put(0xFB31, new int [] {0x05D1, 0x05BC}); - table.put(0xFB30, new int [] {0x05D0, 0x05BC}); - table.put(0xFB2F, new int [] {0x05D0, 0x05B8}); - table.put(0xFB2E, new int [] {0x05D0, 0x05B7}); - table.put(0xFB2D, new int [] {0x05E9, 0x05BC, 0x05C2}); - table.put(0xFB2C, new int [] {0x05E9, 0x05BC, 0x05C1}); - table.put(0xFB2B, new int [] {0x05E9, 0x05C2}); - table.put(0xFB2A, new int [] {0x05E9, 0x05C1}); - table.put(0x2249, new int [] {0x2248, 0x0338}); - table.put(0x2247, new int [] {0x2245, 0x0338}); - table.put(0x06C2, new int [] {0x06C1, 0x0654}); - table.put(0x06C0, new int [] {0x06D5, 0x0654}); - table.put(0x2244, new int [] {0x2243, 0x0338}); - table.put(0x0FB9, new int [] {0x0F90, 0x0FB5}); - table.put(0x2241, new int [] {0x223C, 0x0338}); - table.put(0xFB1F, new int [] {0x05F2, 0x05B7}); - table.put(0xFB1D, new int [] {0x05D9, 0x05B4}); - table.put(0x0FAC, new int [] {0x0FAB, 0x0FB7}); - table.put(0x0FA7, new int [] {0x0FA6, 0x0FB7}); - table.put(0x0FA2, new int [] {0x0FA1, 0x0FB7}); - table.put(0x2226, new int [] {0x2225, 0x0338}); - table.put(0x2224, new int [] {0x2223, 0x0338}); - table.put(0x220C, new int [] {0x220B, 0x0338}); - table.put(0x2209, new int [] {0x2208, 0x0338}); - table.put(0x0390, new int [] {0x03B9, 0x0308, 0x0301}); - table.put(0x2204, new int [] {0x2203, 0x0338}); - table.put(0x038F, new int [] {0x03A9, 0x0301}); - table.put(0x038E, new int [] {0x03A5, 0x0301}); - table.put(0x038C, new int [] {0x039F, 0x0301}); - table.put(0x09DF, new int [] {0x09AF, 0x09BC}); - table.put(0x038A, new int [] {0x0399, 0x0301}); - table.put(0x09DD, new int [] {0x09A2, 0x09BC}); - table.put(0x09DC, new int [] {0x09A1, 0x09BC}); - table.put(0x0389, new int [] {0x0397, 0x0301}); - table.put(0x0388, new int [] {0x0395, 0x0301}); - table.put(0x0387, new int [] {0x00B7}); - table.put(0x0386, new int [] {0x0391, 0x0301}); - table.put(0x0385, new int [] {0x00A8, 0x0301}); - table.put(0x037E, new int [] {0x003B}); - table.put(0x09CC, new int [] {0x09C7, 0x09D7}); - table.put(0x09CB, new int [] {0x09C7, 0x09BE}); - table.put(0x0374, new int [] {0x02B9}); - table.put(0x0C48, new int [] {0x0C46, 0x0C56}); - table.put(0x0F9D, new int [] {0x0F9C, 0x0FB7}); - table.put(0x0344, new int [] {0x0308, 0x0301}); - table.put(0x0343, new int [] {0x0313}); - table.put(0x0341, new int [] {0x0301}); - table.put(0x0340, new int [] {0x0300}); - table.put(0x0F93, new int [] {0x0F92, 0x0FB7}); - table.put(0x0F81, new int [] {0x0F71, 0x0F80}); - table.put(0x0F78, new int [] {0x0FB3, 0x0F80}); - table.put(0x1D1C0, new int [] {0x1D1BA, 0x1D165, 0x1D16F}); - table.put(0x0F76, new int [] {0x0FB2, 0x0F80}); - table.put(0x0F75, new int [] {0x0F71, 0x0F74}); - table.put(0x0F73, new int [] {0x0F71, 0x0F72}); - table.put(0x1D1BF, new int [] {0x1D1B9, 0x1D165, 0x1D16F}); - table.put(0x1D1BE, new int [] {0x1D1BA, 0x1D165, 0x1D16E}); - table.put(0x1D1BD, new int [] {0x1D1B9, 0x1D165, 0x1D16E}); - table.put(0x1D1BC, new int [] {0x1D1BA, 0x1D165}); - table.put(0x1D1BB, new int [] {0x1D1B9, 0x1D165}); - table.put(0x0F69, new int [] {0x0F40, 0x0FB5}); - table.put(0x0F5C, new int [] {0x0F5B, 0x0FB7}); - table.put(0xFAD9, new int [] {0x9F8E}); - table.put(0xFAD8, new int [] {0x9F43}); - table.put(0xFAD7, new int [] {0x27ED3}); - table.put(0xFAD6, new int [] {0x25CD0}); - table.put(0xFAD5, new int [] {0x25249}); - table.put(0xFAD4, new int [] {0x4039}); - table.put(0xFAD3, new int [] {0x4018}); - table.put(0xFAD2, new int [] {0x3B9D}); - table.put(0xFAD1, new int [] {0x233D5}); - table.put(0xFAD0, new int [] {0x22844}); - table.put(0x0F57, new int [] {0x0F56, 0x0FB7}); - table.put(0x0F52, new int [] {0x0F51, 0x0FB7}); - table.put(0xFACF, new int [] {0x2284A}); - table.put(0xFACE, new int [] {0x9F9C}); - table.put(0xFACD, new int [] {0x9B12}); - table.put(0xFACC, new int [] {0x983B}); - table.put(0xFACB, new int [] {0x980B}); - table.put(0xFACA, new int [] {0x97FF}); - table.put(0x0F4D, new int [] {0x0F4C, 0x0FB7}); - table.put(0xFAC9, new int [] {0x97DB}); - table.put(0xFAC8, new int [] {0x9756}); - table.put(0xFAC7, new int [] {0x96E3}); - table.put(0xFAC6, new int [] {0x967C}); - table.put(0xFAC5, new int [] {0x9276}); - table.put(0x2ADC, new int [] {0x2ADD, 0x0338}); - table.put(0xFAC4, new int [] {0x9199}); - table.put(0xFAC3, new int [] {0x9072}); - table.put(0xFAC2, new int [] {0x8F38}); - table.put(0xFAC1, new int [] {0x8D08}); - table.put(0xFAC0, new int [] {0x8B8A}); - table.put(0x0F43, new int [] {0x0F42, 0x0FB7}); - table.put(0xFABF, new int [] {0x8B39}); - table.put(0xFABE, new int [] {0x8AED}); - table.put(0xFABD, new int [] {0x8AFE}); - table.put(0xFABC, new int [] {0x8B01}); - table.put(0xFABB, new int [] {0x8ACB}); - table.put(0xFABA, new int [] {0x8AF8}); - table.put(0xFAB9, new int [] {0x8ABF}); - table.put(0xFAB8, new int [] {0x8996}); - table.put(0xFAB7, new int [] {0x8986}); - table.put(0xFAB6, new int [] {0x8941}); - table.put(0xFAB5, new int [] {0x8779}); - table.put(0xFAB4, new int [] {0x83EF}); - table.put(0xFAB3, new int [] {0x8352}); - table.put(0xFAB2, new int [] {0x8005}); - table.put(0xFAB1, new int [] {0x7F3E}); - table.put(0xFAB0, new int [] {0x7DF4}); - table.put(0xFAAF, new int [] {0x7D5B}); - table.put(0xFAAE, new int [] {0x7C7B}); - table.put(0xFAAD, new int [] {0x7BC0}); - table.put(0xFAAC, new int [] {0x7AB1}); - table.put(0xFAAB, new int [] {0x78CC}); - table.put(0x2FA1D, new int [] {0x2A600}); - table.put(0x21CF, new int [] {0x21D2, 0x0338}); - table.put(0xFAAA, new int [] {0x7740}); - table.put(0x2FA1C, new int [] {0x9F3B}); - table.put(0x21CE, new int [] {0x21D4, 0x0338}); - table.put(0x2FA1B, new int [] {0x9F16}); - table.put(0x21CD, new int [] {0x21D0, 0x0338}); - table.put(0x2FA1A, new int [] {0x9F0F}); - table.put(0xFAA9, new int [] {0x774A}); - table.put(0xFAA8, new int [] {0x76F4}); - table.put(0xFAA7, new int [] {0x76DB}); - table.put(0x2FA19, new int [] {0x9F05}); - table.put(0xFAA6, new int [] {0x76CA}); - table.put(0x2FA18, new int [] {0x9EFE}); - table.put(0xFAA5, new int [] {0x761F}); - table.put(0x2FA17, new int [] {0x9EF9}); - table.put(0xFAA4, new int [] {0x761D}); - table.put(0x2FA16, new int [] {0x4D56}); - table.put(0xFAA3, new int [] {0x753B}); - table.put(0x2FA15, new int [] {0x9EBB}); - table.put(0xFAA2, new int [] {0x7506}); - table.put(0x2FA14, new int [] {0x2A291}); - table.put(0xFAA1, new int [] {0x7471}); - table.put(0x2FA13, new int [] {0x2A20E}); - table.put(0xFAA0, new int [] {0x732A}); - table.put(0x2FA12, new int [] {0x2A105}); - table.put(0x2FA11, new int [] {0x4CF8}); - table.put(0x2FA10, new int [] {0x2A0CE}); - table.put(0x2FA0F, new int [] {0x9D67}); - table.put(0x2FA0E, new int [] {0x4CED}); - table.put(0x2FA0D, new int [] {0x4CCE}); - table.put(0x2FA0C, new int [] {0x9CFD}); - table.put(0x2FA0B, new int [] {0x9C40}); - table.put(0x2FA0A, new int [] {0x9B12}); - table.put(0x2FA09, new int [] {0x29B30}); - table.put(0x0626, new int [] {0x064A, 0x0654}); - table.put(0x2FA08, new int [] {0x4BCE}); - table.put(0x0625, new int [] {0x0627, 0x0655}); - table.put(0x2FA07, new int [] {0x99FE}); - table.put(0x0624, new int [] {0x0648, 0x0654}); - table.put(0x2FA06, new int [] {0x99C2}); - table.put(0x0623, new int [] {0x0627, 0x0654}); - table.put(0x2FA05, new int [] {0x99A7}); - table.put(0x0622, new int [] {0x0627, 0x0653}); - table.put(0x2FA04, new int [] {0x9929}); - table.put(0x2FA03, new int [] {0x4B33}); - table.put(0x2FA02, new int [] {0x98E2}); - table.put(0x2FA01, new int [] {0x295B6}); - table.put(0x2FA00, new int [] {0x9829}); - table.put(0x21AE, new int [] {0x2194, 0x0338}); - table.put(0x095F, new int [] {0x092F, 0x093C}); - table.put(0x095E, new int [] {0x092B, 0x093C}); - table.put(0x095D, new int [] {0x0922, 0x093C}); - table.put(0x095C, new int [] {0x0921, 0x093C}); - table.put(0x095B, new int [] {0x091C, 0x093C}); - table.put(0x095A, new int [] {0x0917, 0x093C}); - table.put(0x0959, new int [] {0x0916, 0x093C}); - table.put(0x0958, new int [] {0x0915, 0x093C}); - table.put(0x0934, new int [] {0x0933, 0x093C}); - table.put(0x0931, new int [] {0x0930, 0x093C}); - return table; - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java index 012f6c9fd..911df1c88 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/HighSurrogateCharSet.java @@ -85,12 +85,12 @@ package org.teavm.classlib.java.util.regex; /** * This class represents high surrogate character. */ -class HighSurrogateCharSet extends JointSet{ +class HighSurrogateCharSet extends JointSet { /* - * Note that we can use high and low surrogate characters - * that don't combine into supplementary code point. - * See http://www.unicode.org/reports/tr18/#Supplementary_Characters + * Note that we can use high and low surrogate characters that don't combine + * into supplementary code point. See + * http://www.unicode.org/reports/tr18/#Supplementary_Characters */ private char high; @@ -102,21 +102,24 @@ class HighSurrogateCharSet extends JointSet{ /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); if (stringIndex + 1 > strLength) { @@ -130,8 +133,8 @@ class HighSurrogateCharSet extends JointSet{ char low = testString.charAt(stringIndex + 1); /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isLowSurrogate(low)) { return -1; @@ -139,17 +142,16 @@ class HighSurrogateCharSet extends JointSet{ } if (this.high == high) { - return next.matches(stringIndex + 1, testString, - matchResult); + return next.matches(stringIndex + 1, testString, matchResult); } return -1; } - public int find(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; int strLength = matchResult.getRightBound(); while (strIndex < strLength) { @@ -158,11 +160,11 @@ class HighSurrogateCharSet extends JointSet{ if (strIndex < 0) return -1; - if (strIndex + 1 < strLength ) { + if (strIndex + 1 < strLength) { /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isLowSurrogate(testStr.charAt(strIndex + 1))) { strIndex += 2; @@ -182,10 +184,10 @@ class HighSurrogateCharSet extends JointSet{ return super.find(strIndex, testString, matchResult); } - public int findBack(int strIndex, int lastIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; int strLength = matchResult.getRightBound(); while (lastIndex >= strIndex) { @@ -197,8 +199,8 @@ class HighSurrogateCharSet extends JointSet{ if (lastIndex + 1 < strLength) { /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isLowSurrogate(testStr.charAt(lastIndex + 1))) { lastIndex--; @@ -219,6 +221,7 @@ class HighSurrogateCharSet extends JointSet{ return super.findBack(strIndex, lastIndex, testString, matchResult); } + @Override protected String getName() { return "" + high; } @@ -227,6 +230,7 @@ class HighSurrogateCharSet extends JointSet{ return high; } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { return false; @@ -239,12 +243,13 @@ class HighSurrogateCharSet extends JointSet{ } else if (set instanceof LowSurrogateCharSet) { return false; } else if (set instanceof HighSurrogateCharSet) { - return ((HighSurrogateCharSet) set).high == this.high; + return ((HighSurrogateCharSet)set).high == this.high; } return true; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java index 8b85df35b..aeaf1ec1f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/JointSet.java @@ -24,13 +24,13 @@ import java.util.ArrayList; import java.util.Iterator; /** - * Represents group, which is alternation of other subexpression. - * One should think about "group" in this model as JointSet opening - * group and corresponding FSet closing group. + * Represents group, which is alternation of other subexpression. One should + * think about "group" in this model as JointSet opening group and corresponding + * FSet closing group. */ class JointSet extends AbstractSet { - protected ArrayList children; + protected ArrayList children; protected AbstractSet fSet; @@ -39,7 +39,7 @@ class JointSet extends AbstractSet { protected JointSet() { } - public JointSet(ArrayList children, FSet fSet) { + public JointSet(ArrayList children, FSet fSet) { this.children = children; this.fSet = fSet; this.groupIndex = fSet.getGroupIndex(); @@ -48,8 +48,8 @@ class JointSet extends AbstractSet { /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (children == null) { return -1; } @@ -57,7 +57,7 @@ class JointSet extends AbstractSet { matchResult.setStart(groupIndex, stringIndex); int size = children.size(); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); int shift = e.matches(stringIndex, testString, matchResult); if (shift >= 0) { return shift; @@ -67,14 +67,17 @@ class JointSet extends AbstractSet { return -1; } + @Override public void setNext(AbstractSet next) { fSet.setNext(next); } + @Override public AbstractSet getNext() { return fSet.getNext(); } + @Override protected String getName() { return "JointSet"; //$NON-NLS-1$ } @@ -83,10 +86,11 @@ class JointSet extends AbstractSet { return groupIndex; } + @Override public boolean first(AbstractSet set) { if (children != null) { - for (Iterator i = children.iterator(); i.hasNext();) { - if (((AbstractSet) i.next()).first(set)) { + for (Iterator i = children.iterator(); i.hasNext();) { + if ((i.next()).first(set)) { return true; } } @@ -95,60 +99,46 @@ class JointSet extends AbstractSet { return false; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { - return !(matchResult.getEnd(groupIndex) >= 0 && matchResult - .getStart(groupIndex) == matchResult.getEnd(groupIndex)); + return !(matchResult.getEnd(groupIndex) >= 0 && matchResult.getStart(groupIndex) == matchResult + .getEnd(groupIndex)); } /** - * This method is used for traversing nodes after the - * first stage of compilation. + * This method is used for traversing nodes after the first stage of + * compilation. */ + @Override public void processSecondPass() { - this.isSecondPassVisited = true; + this.isSecondPassVisited = true; - if (fSet != null && !fSet.isSecondPassVisited) { + if (fSet != null && !fSet.isSecondPassVisited) { + fSet.processSecondPass(); + } - /* - * Add here code to do during the pass - */ + if (children != null) { + int childrenSize = children.size(); - /* - * End code to do during the pass - */ - fSet.processSecondPass(); - } + for (int i = 0; i < childrenSize; i++) { + AbstractSet child = children.get(i); + JointSet set = child.processBackRefReplacement(); - if (children != null) { - int childrenSize = children.size(); + if (set != null) { + child.isSecondPassVisited = true; + children.remove(i); + children.add(i, set); + child = set; + } - for (int i = 0; i < childrenSize; i++) { - AbstractSet child = (AbstractSet) children.get(i); + if (!child.isSecondPassVisited) { + child.processSecondPass(); + } + } + } - /* - * Add here code to do during the pass - */ - - JointSet set = child.processBackRefReplacement(); - - if (set != null) { - child.isSecondPassVisited = true; - children.remove(i); - children.add(i, set); - child = (AbstractSet) set; - } - - /* - * End code to do during the pass - */ - if (!child.isSecondPassVisited) { - child.processSecondPass(); - } - } - } - - if (next != null) { - super.processSecondPass(); - } + if (next != null) { + super.processSecondPass(); + } } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java index f56a6c4f4..f68476eea 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafQuantifierSet.java @@ -20,7 +20,6 @@ */ package org.teavm.classlib.java.util.regex; - /** * @author Nikolay A. Kuznetsov */ @@ -33,13 +32,13 @@ class LeafQuantifierSet extends QuantifierSet { this.leaf = innerSet; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int i = 0; int shift = 0; - while (stringIndex + leaf.charCount() <= matchResult.getRightBound() - && (shift = leaf.accepts(stringIndex, testString)) > 0) { + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() && + (shift = leaf.accepts(stringIndex, testString)) > 0) { stringIndex += shift; i++; } @@ -55,19 +54,22 @@ class LeafQuantifierSet extends QuantifierSet { return -1; } + @Override protected String getName() { - return ""; //$NON-NLS-1$ + return ""; } /** * Sets an inner set. + * * @param innerSet * The innerSet to set. */ + @Override public void setInnerSet(AbstractSet innerSet) { if (!(innerSet instanceof LeafSet)) throw new RuntimeException(""); super.setInnerSet(innerSet); - this.leaf = (LeafSet) innerSet; + this.leaf = (LeafSet)innerSet; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java index 75a38c981..d517990b3 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LeafSet.java @@ -48,8 +48,8 @@ abstract class LeafSet extends AbstractSet { * Checks if we can enter this state and pass the control to the next one. * Return positive value if match succeeds, negative otherwise. */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (stringIndex + charCount() > matchResult.getRightBound()) { matchResult.hitEnd = true; @@ -66,12 +66,14 @@ abstract class LeafSet extends AbstractSet { /** * Returns number of characters this node consumes. + * * @return number of characters this node consumes. */ public int charCount() { return charCount; } + @Override public boolean hasConsumed(MatchResultImpl mr) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java index 78244f389..2b7d47a6a 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Lexer.java @@ -111,19 +111,19 @@ class Lexer { public static final int MODE_ESCAPE = 1 << 2; - //maximum length of decomposition + // maximum length of decomposition static final int MAX_DECOMPOSITION_LENGTH = 4; /* - * maximum length of Hangul decomposition - * note that MAX_HANGUL_DECOMPOSITION_LENGTH <= MAX_DECOMPOSITION_LENGTH + * maximum length of Hangul decomposition note that + * MAX_HANGUL_DECOMPOSITION_LENGTH <= MAX_DECOMPOSITION_LENGTH */ static final int MAX_HANGUL_DECOMPOSITION_LENGTH = 3; /* - * Following constants are needed for Hangul canonical decomposition. - * Hangul decomposition algorithm and constants are taken according - * to description at http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * Following constants are needed for Hangul canonical decomposition. Hangul + * decomposition algorithm and constants are taken according to description + * at http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf * "3.12 Conjoining Jamo Behavior" */ static final int SBase = 0xAC00; @@ -144,17 +144,17 @@ class Lexer { static final int NCount = 588; - //table that contains canonical decomposition mappings + // table that contains canonical decomposition mappings private static IntArrHash decompTable = null; - //table that contains canonical combining classes + // table that contains canonical combining classes private static IntHash canonClassesTable = null; private static int canonClassesTableSize; /* - * Table that contains information about Unicode codepoints with - * single codepoint decomposition + * Table that contains information about Unicode codepoints with single + * codepoint decomposition */ private static IntHash singleDecompTable = null; @@ -172,13 +172,13 @@ class Lexer { // previous char read private int lookBack; - //current character read + // current character read private int ch; - //next character + // next character private int lookAhead; - //index of last char in pattern plus one + // index of last char in pattern plus one private int patternFullLength = 0; // cur special token @@ -187,19 +187,19 @@ class Lexer { // next special token private SpecialToken lookAheadST = null; - // cur char being processed + // cur char being processed private int index = 0; - // previous non-whitespace character index; + // previous non-whitespace character index; private int prevNW = 0; - // cur token start index + // cur token start index private int curToc = 0; - // look ahead token index + // look ahead token index private int lookAheadToc = 0; - // original string representing pattern + // original string representing pattern private String orig = null; public Lexer(String pattern, int flags) { @@ -211,8 +211,7 @@ class Lexer { } this.pattern = new char[pattern.length() + 2]; - System.arraycopy(pattern.toCharArray(), 0, this.pattern, 0, - pattern.length()); + System.arraycopy(pattern.toCharArray(), 0, this.pattern, 0, pattern.length()); this.pattern[this.pattern.length - 1] = 0; this.pattern[this.pattern.length - 2] = 0; patternFullLength = this.pattern.length; @@ -257,13 +256,13 @@ class Lexer { */ public void restoreFlags(int flags) { this.flags = flags; - lookAhead = ch; - lookAheadST = curST; + lookAhead = ch; + lookAheadST = curST; - //curToc is an index of closing bracket ) - index = curToc + 1; + // curToc is an index of closing bracket ) + index = curToc + 1; lookAheadToc = curToc; - movePointer(); + movePointer(); } public SpecialToken peekSpecial() { @@ -323,128 +322,31 @@ class Lexer { /** * Normalize given expression. * - * @param input - expression to normalize + * @param input + * - expression to normalize * @return normalized expression. */ static String normalize(String input) { - char [] inputChars = input.toCharArray(); - int inputLength = inputChars.length; - int resCodePointsIndex = 0; - int inputCodePointsIndex = 0; - int decompHangulIndex = 0; - - //codePoints of input - int [] inputCodePoints = new int [inputLength]; - - //result of canonical decomposition of input - int [] resCodePoints = new int [inputLength * MAX_DECOMPOSITION_LENGTH]; - - //current symbol's codepoint - int ch; - - //current symbol's decomposition - int [] decomp; - - //result of canonical and Hangul decomposition of input - int [] decompHangul; - - //result of canonical decomposition of input in UTF-16 encoding - StringBuilder result = new StringBuilder(); - - decompTable = HashDecompositions.getHashDecompositions(); - canonClassesTable = CanClasses.getHashCanClasses(); - canonClassesTableSize = canonClassesTable.size; - singleDecompTable = SingleDecompositions.getHashSingleDecompositions(); - singleDecompTableSize = singleDecompTable.size; - - for (int i = 0; i < inputLength; i += Character.charCount(ch)) { - ch = Character.codePointAt(inputChars, i); - inputCodePoints[inputCodePointsIndex++] = ch; - } - - /* - * Canonical decomposition based on mappings in decompTable - */ - for (int i = 0; i < inputCodePointsIndex; i++) { - ch = inputCodePoints[i]; - - decomp = Lexer.getDecomposition(ch); - if (decomp == null) { - resCodePoints[resCodePointsIndex++] = ch; - } else { - int curSymbDecompLength = decomp.length; - - for (int j = 0; j < curSymbDecompLength; j++) { - resCodePoints[resCodePointsIndex++] = decomp[j]; - } - } - } - - /* - * Canonical ordering. - * See http://www.unicode.org/reports/tr15/#Decomposition for - * details - */ - resCodePoints = Lexer.getCanonicalOrder(resCodePoints, - resCodePointsIndex); - - /* - * Decomposition for Hangul syllables. - * See http://www.unicode.org/reports/tr15/#Hangul for - * details - */ - decompHangul = new int [resCodePoints.length]; - - for (int i = 0; i < resCodePointsIndex; i++) { - int curSymb = resCodePoints[i]; - - decomp = getHangulDecomposition(curSymb); - if (decomp == null) { - decompHangul[decompHangulIndex++] = curSymb; - } else{ - - /* - * Note that Hangul decompositions have length that is - * equal 2 or 3. - */ - decompHangul[decompHangulIndex++] = decomp[0]; - decompHangul[decompHangulIndex++] = decomp[1]; - if (decomp.length == 3) { - decompHangul[decompHangulIndex++] = decomp[2]; - } - } - } - - /* - * Translating into UTF-16 encoding - */ - for (int i = 0; i < decompHangulIndex; i++) { - result.append(Character.toChars(decompHangul[i])); - } - - return result.toString(); + return input; } /** - * Rearrange codepoints according - * to canonical order. + * Rearrange codepoints according to canonical order. * - * @param inputInts - array that contains Unicode codepoints - * @param length - index of last Unicode codepoint plus 1 + * @param inputInts + * - array that contains Unicode codepoints + * @param length + * - index of last Unicode codepoint plus 1 * * @return array that contains rearranged codepoints. */ - static int [] getCanonicalOrder(int [] inputInts, int length) { - int inputLength = (length < inputInts.length) - ? length - : inputInts.length; + static int[] getCanonicalOrder(int[] inputInts, int length) { + int inputLength = (length < inputInts.length) ? length : inputInts.length; /* - * Simple bubble-sort algorithm. - * Note that many codepoints have 0 - * canonical class, so this algorithm works - * almost lineary in overwhelming majority - * of cases. This is due to specific of Unicode + * Simple bubble-sort algorithm. Note that many codepoints have 0 + * canonical class, so this algorithm works almost lineary in + * overwhelming majority of cases. This is due to specific of Unicode * combining classes and codepoints. */ for (int i = 1; i < inputLength; i++) { @@ -464,9 +366,9 @@ class Lexer { } } - ch = inputInts [i]; + ch = inputInts[i]; for (int k = i; k > j + 1; k--) { - inputInts[k] = inputInts [k - 1]; + inputInts[k] = inputInts[k - 1]; } inputInts[j + 1] = ch; } @@ -510,25 +412,22 @@ class Lexer { if (mode == Lexer.MODE_ESCAPE) { if (lookAhead == '\\') { - //need not care about supplementary codepoints here - lookAhead = (index < pattern.length) ? pattern[nextIndex()] - : 0; + // need not care about supplementary codepoints here + lookAhead = (index < pattern.length) ? pattern[nextIndex()] : 0; switch (lookAhead) { - case 'E': { - mode = saved_mode; + case 'E': { + mode = saved_mode; - lookAhead = (index <= pattern.length - 2) - ? nextCodePoint() - : 0; - break; - } + lookAhead = (index <= pattern.length - 2) ? nextCodePoint() : 0; + break; + } - default: { - lookAhead = '\\'; - index = prevNW; - return; - } + default: { + lookAhead = '\\'; + index = prevNW; + return; + } } } else { return; @@ -537,302 +436,298 @@ class Lexer { if (lookAhead == '\\') { - lookAhead = (index < pattern.length - 2) ? nextCodePoint() - : -1; + lookAhead = (index < pattern.length - 2) ? nextCodePoint() : -1; switch (lookAhead) { - case -1: - throw new TPatternSyntaxException("", this.toString(), index); - case 'P': - case 'p': { - String cs = parseCharClassName(); - boolean negative = false; - - if (lookAhead == 'P') - negative = true; - try { - lookAheadST = AbstractCharClass.getPredefinedClass(cs, - negative); - } catch (MissingResourceException mre) { + case -1: throw new TPatternSyntaxException("", this.toString(), index); - } - lookAhead = 0; - break; - } + case 'P': + case 'p': { + String cs = parseCharClassName(); + boolean negative = false; - case 'w': - case 's': - case 'd': - case 'W': - case 'S': - case 'D': { - lookAheadST = CharClass.getPredefinedClass(new String( - pattern, prevNW, 1), false); - lookAhead = 0; - break; - } - - case 'Q': { - saved_mode = mode; - mode = Lexer.MODE_ESCAPE; - reread = true; - break; - } - - case 't': - lookAhead = '\t'; - break; - case 'n': - lookAhead = '\n'; - break; - case 'r': - lookAhead = '\r'; - break; - case 'f': - lookAhead = '\f'; - break; - case 'a': - lookAhead = '\u0007'; - break; - case 'e': - lookAhead = '\u001B'; - break; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - if (mode == Lexer.MODE_PATTERN) { - lookAhead = 0x80000000 | lookAhead; - } - break; - } - - case '0': - lookAhead = readOctals(); - break; - case 'x': - lookAhead = readHex("hexadecimal", 2); //$NON-NLS-1$ - break; - case 'u': - lookAhead = readHex("Unicode", 4); //$NON-NLS-1$ - break; - - case 'b': - lookAhead = CHAR_WORD_BOUND; - break; - case 'B': - lookAhead = CHAR_NONWORD_BOUND; - break; - case 'A': - lookAhead = CHAR_START_OF_INPUT; - break; - case 'G': - lookAhead = CHAR_PREVIOUS_MATCH; - break; - case 'Z': - lookAhead = CHAR_END_OF_LINE; - break; - case 'z': - lookAhead = CHAR_END_OF_INPUT; - break; - case 'c': { - if (index < pattern.length - 2) { - - //need not care about supplementary codepoints here - lookAhead = (pattern[nextIndex()] & 0x1f); + if (lookAhead == 'P') + negative = true; + try { + lookAheadST = AbstractCharClass.getPredefinedClass(cs, negative); + } catch (MissingResourceException mre) { + throw new TPatternSyntaxException("", this.toString(), index); + } + lookAhead = 0; break; - } else { - throw new TPatternSyntaxException("", this.toString(), index); } - } - case 'C': - case 'E': - case 'F': - case 'H': - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - case 'O': - case 'R': - case 'T': - case 'U': - case 'V': - case 'X': - case 'Y': - case 'g': - case 'h': - case 'i': - case 'j': - case 'k': - case 'l': - case 'm': - case 'o': - case 'q': - case 'y': - throw new TPatternSyntaxException("", this.toString(), index); - default: - break; + case 'w': + case 's': + case 'd': + case 'W': + case 'S': + case 'D': { + lookAheadST = CharClass.getPredefinedClass(new String(pattern, prevNW, 1), false); + lookAhead = 0; + break; + } + + case 'Q': { + saved_mode = mode; + mode = Lexer.MODE_ESCAPE; + reread = true; + break; + } + + case 't': + lookAhead = '\t'; + break; + case 'n': + lookAhead = '\n'; + break; + case 'r': + lookAhead = '\r'; + break; + case 'f': + lookAhead = '\f'; + break; + case 'a': + lookAhead = '\u0007'; + break; + case 'e': + lookAhead = '\u001B'; + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (mode == Lexer.MODE_PATTERN) { + lookAhead = 0x80000000 | lookAhead; + } + break; + } + + case '0': + lookAhead = readOctals(); + break; + case 'x': + lookAhead = readHex(2); + break; + case 'u': + lookAhead = readHex(4); + break; + + case 'b': + lookAhead = CHAR_WORD_BOUND; + break; + case 'B': + lookAhead = CHAR_NONWORD_BOUND; + break; + case 'A': + lookAhead = CHAR_START_OF_INPUT; + break; + case 'G': + lookAhead = CHAR_PREVIOUS_MATCH; + break; + case 'Z': + lookAhead = CHAR_END_OF_LINE; + break; + case 'z': + lookAhead = CHAR_END_OF_INPUT; + break; + case 'c': { + if (index < pattern.length - 2) { + + // need not care about supplementary codepoints here + lookAhead = (pattern[nextIndex()] & 0x1f); + break; + } else { + throw new TPatternSyntaxException("", this.toString(), index); + } + } + case 'C': + case 'E': + case 'F': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'R': + case 'T': + case 'U': + case 'V': + case 'X': + case 'Y': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'o': + case 'q': + case 'y': + throw new TPatternSyntaxException("", this.toString(), index); + + default: + break; } } else if (mode == Lexer.MODE_PATTERN) { switch (lookAhead) { - case '+': - case '*': - case '?': { - char mod = (index < pattern.length) ? pattern[index] : '*'; - switch (mod) { - case '+': { - lookAhead = lookAhead | Lexer.QMOD_POSSESSIVE; - nextIndex(); - break; - } + case '+': + case '*': case '?': { - lookAhead = lookAhead | Lexer.QMOD_RELUCTANT; - nextIndex(); - break; - } - default: { - lookAhead = lookAhead | Lexer.QMOD_GREEDY; - break; - } - } - - break; - } - - case '{': { - lookAheadST = processQuantifier(lookAhead); - break; - } - - case '$': - lookAhead = CHAR_DOLLAR; - break; - case '(': { - if (pattern[index] == '?') { - nextIndex(); - char nonCap = pattern[index]; - boolean behind = false; - do { - if (!behind) { - switch (nonCap) { - case '!': - lookAhead = CHAR_NEG_LOOKAHEAD; - nextIndex(); - break; - case '=': - lookAhead = CHAR_POS_LOOKAHEAD; - nextIndex(); - break; - case '>': - lookAhead = CHAR_ATOMIC_GROUP; - nextIndex(); - break; - case '<': { - nextIndex(); - nonCap = pattern[index]; - behind = true; - break; - } - default: { - lookAhead = readFlags(); - - /* - * We return res = res | 1 << 8 - * from readFlags() if we read - * (?idmsux-idmsux) - */ - if (lookAhead >= 256) { - - //Erase auxiliary bit - lookAhead = (lookAhead & 0xff); - flags = lookAhead; - lookAhead = lookAhead << 16; - lookAhead = CHAR_FLAGS | lookAhead; - } else { - flags = lookAhead; - lookAhead = lookAhead << 16; - lookAhead = CHAR_NONCAP_GROUP - | lookAhead; - } - break; - } - } - } else { - behind = false; - switch (nonCap) { - case '!': - lookAhead = CHAR_NEG_LOOKBEHIND; - nextIndex(); - break; - case '=': - lookAhead = CHAR_POS_LOOKBEHIND; - nextIndex(); - break; - default: - throw new TPatternSyntaxException("", this.toString(), index); - } + char mod = (index < pattern.length) ? pattern[index] : '*'; + switch (mod) { + case '+': { + lookAhead = lookAhead | Lexer.QMOD_POSSESSIVE; + nextIndex(); + break; } - } while (behind); - } else { - lookAhead = CHAR_LEFT_PARENTHESIS; - } - break; - } + case '?': { + lookAhead = lookAhead | Lexer.QMOD_RELUCTANT; + nextIndex(); + break; + } + default: { + lookAhead = lookAhead | Lexer.QMOD_GREEDY; + break; + } + } - case ')': - lookAhead = CHAR_RIGHT_PARENTHESIS; - break; - case '[': { - lookAhead = CHAR_LEFT_SQUARE_BRACKET; - setMode(Lexer.MODE_RANGE); - break; - } - case ']': { - if (mode == Lexer.MODE_RANGE) { - lookAhead = CHAR_RIGHT_SQUARE_BRACKET; + break; } - break; - } - case '^': - lookAhead = CHAR_CARET; - break; - case '|': - lookAhead = CHAR_VERTICAL_BAR; - break; - case '.': - lookAhead = CHAR_DOT; - break; - default: - break; + + case '{': { + lookAheadST = processQuantifier(lookAhead); + break; + } + + case '$': + lookAhead = CHAR_DOLLAR; + break; + case '(': { + if (pattern[index] == '?') { + nextIndex(); + char nonCap = pattern[index]; + boolean behind = false; + do { + if (!behind) { + switch (nonCap) { + case '!': + lookAhead = CHAR_NEG_LOOKAHEAD; + nextIndex(); + break; + case '=': + lookAhead = CHAR_POS_LOOKAHEAD; + nextIndex(); + break; + case '>': + lookAhead = CHAR_ATOMIC_GROUP; + nextIndex(); + break; + case '<': { + nextIndex(); + nonCap = pattern[index]; + behind = true; + break; + } + default: { + lookAhead = readFlags(); + + /* + * We return res = res | 1 << 8 from + * readFlags() if we read + * (?idmsux-idmsux) + */ + if (lookAhead >= 256) { + + // Erase auxiliary bit + lookAhead = (lookAhead & 0xff); + flags = lookAhead; + lookAhead = lookAhead << 16; + lookAhead = CHAR_FLAGS | lookAhead; + } else { + flags = lookAhead; + lookAhead = lookAhead << 16; + lookAhead = CHAR_NONCAP_GROUP | lookAhead; + } + break; + } + } + } else { + behind = false; + switch (nonCap) { + case '!': + lookAhead = CHAR_NEG_LOOKBEHIND; + nextIndex(); + break; + case '=': + lookAhead = CHAR_POS_LOOKBEHIND; + nextIndex(); + break; + default: + throw new TPatternSyntaxException("", this.toString(), index); + } + } + } while (behind); + } else { + lookAhead = CHAR_LEFT_PARENTHESIS; + } + break; + } + + case ')': + lookAhead = CHAR_RIGHT_PARENTHESIS; + break; + case '[': { + lookAhead = CHAR_LEFT_SQUARE_BRACKET; + setMode(Lexer.MODE_RANGE); + break; + } + case ']': { + if (mode == Lexer.MODE_RANGE) { + lookAhead = CHAR_RIGHT_SQUARE_BRACKET; + } + break; + } + case '^': + lookAhead = CHAR_CARET; + break; + case '|': + lookAhead = CHAR_VERTICAL_BAR; + break; + case '.': + lookAhead = CHAR_DOT; + break; + default: + break; } } else if (mode == Lexer.MODE_RANGE) { switch (lookAhead) { - case '[': - lookAhead = CHAR_LEFT_SQUARE_BRACKET; - break; - case ']': - lookAhead = CHAR_RIGHT_SQUARE_BRACKET; - break; - case '^': - lookAhead = CHAR_CARET; - break; - case '&': - lookAhead = CHAR_AMPERSAND; - break; - case '-': - lookAhead = CHAR_HYPHEN; - break; - default: - break; + case '[': + lookAhead = CHAR_LEFT_SQUARE_BRACKET; + break; + case ']': + lookAhead = CHAR_RIGHT_SQUARE_BRACKET; + break; + case '^': + lookAhead = CHAR_CARET; + break; + case '&': + lookAhead = CHAR_AMPERSAND; + break; + case '-': + lookAhead = CHAR_HYPHEN; + break; + default: + break; } } } while (reread); @@ -851,8 +746,7 @@ class Lexer { nextIndex(); char ch = 0; - while (index < pattern.length - 2 - && (ch = pattern[nextIndex()]) != '}') { + while (index < pattern.length - 2 && (ch = pattern[nextIndex()]) != '}') { sb.append(ch); } if (ch != '}') @@ -860,7 +754,7 @@ class Lexer { } if (sb.length() == 0) - throw new TPatternSyntaxException("", this.toString(), index); + throw new TPatternSyntaxException("", this.toString(), index); String res = sb.toString(); if (res.length() == 1) @@ -884,7 +778,7 @@ class Lexer { throw new TPatternSyntaxException("", this.toString(), index); } } else { - sb.append((char) ch); + sb.append((char)ch); } } if (ch != '}') { @@ -908,21 +802,22 @@ class Lexer { char mod = (index < pattern.length) ? pattern[index] : '*'; switch (mod) { - case '+': - lookAhead = Lexer.QUANT_COMP_P; - nextIndex(); - break; - case '?': - lookAhead = Lexer.QUANT_COMP_R; - nextIndex(); - break; - default: - lookAhead = Lexer.QUANT_COMP; - break; + case '+': + lookAhead = Lexer.QUANT_COMP_P; + nextIndex(); + break; + case '?': + lookAhead = Lexer.QUANT_COMP_R; + nextIndex(); + break; + default: + lookAhead = Lexer.QUANT_COMP; + break; } return new Quantifier(min, max); } + @Override public String toString() { return orig; } @@ -933,7 +828,7 @@ class Lexer { * @return true if there are no more characters in the pattern. */ public boolean isEmpty() { - return ch == 0 && lookAhead == 0 && index == patternFullLength && !isSpecial(); + return ch == 0 && lookAhead == 0 && index == patternFullLength && !isSpecial(); } /** @@ -941,7 +836,7 @@ class Lexer { */ public static boolean isLetter(int ch) { - //all supplementary codepoints have integer value that is >= 0; + // all supplementary codepoints have integer value that is >= 0; return ch >= 0; } @@ -956,10 +851,9 @@ class Lexer { } /* - * Note that Character class methods - * isHighSurrogate(), isLowSurrogate() - * take char parameter while we need an int - * parameter without truncation to char value + * Note that Character class methods isHighSurrogate(), isLowSurrogate() + * take char parameter while we need an int parameter without truncation to + * char value */ public boolean isHighSurrogate() { return (ch <= 0xDBFF) && (ch >= 0xD800); @@ -980,7 +874,7 @@ class Lexer { /** * Process hexadecimal integer. */ - private int readHex(String radixName, int max) { + private int readHex(int max) { StringBuilder st = new StringBuilder(max); int length = pattern.length - 2; int i; @@ -1008,18 +902,17 @@ class Lexer { int length = pattern.length - 2; switch (first = Character.digit(pattern[index], 8)) { - case -1: - throw new TPatternSyntaxException("", this.toString(), index); - default: { - if (first > 3) - max--; - nextIndex(); - res = first; - } + case -1: + throw new TPatternSyntaxException("", this.toString(), index); + default: { + if (first > 3) + max--; + nextIndex(); + res = first; + } } - while (i < max && index < length - && (first = Character.digit(pattern[index], 8)) >= 0) { + while (i < max && index < length && (first = Character.digit(pattern[index], 8)) >= 0) { res = res * 8 + first; nextIndex(); i++; @@ -1039,66 +932,53 @@ class Lexer { while (index < pattern.length) { ch = pattern[index]; switch (ch) { - case '-': - if (!pos) { - throw new TPatternSyntaxException("", this.toString(), index); - } - pos = false; - break; + case '-': + if (!pos) { + throw new TPatternSyntaxException("", this.toString(), index); + } + pos = false; + break; - case 'i': - res = pos - ? res | TPattern.CASE_INSENSITIVE - : (res ^ TPattern.CASE_INSENSITIVE) & res; - break; + case 'i': + res = pos ? res | TPattern.CASE_INSENSITIVE : (res ^ TPattern.CASE_INSENSITIVE) & res; + break; - case 'd': - res = pos - ? res | TPattern.UNIX_LINES - : (res ^ TPattern.UNIX_LINES) & res; - break; + case 'd': + res = pos ? res | TPattern.UNIX_LINES : (res ^ TPattern.UNIX_LINES) & res; + break; - case 'm': - res = pos - ? res | TPattern.MULTILINE - : (res ^ TPattern.MULTILINE) & res; - break; + case 'm': + res = pos ? res | TPattern.MULTILINE : (res ^ TPattern.MULTILINE) & res; + break; - case 's': - res = pos - ? res | TPattern.DOTALL - : (res ^ TPattern.DOTALL) & res; - break; + case 's': + res = pos ? res | TPattern.DOTALL : (res ^ TPattern.DOTALL) & res; + break; - case 'u': - res = pos - ? res | TPattern.UNICODE_CASE - : (res ^ TPattern.UNICODE_CASE) & res; - break; + case 'u': + res = pos ? res | TPattern.UNICODE_CASE : (res ^ TPattern.UNICODE_CASE) & res; + break; - case 'x': - res = pos - ? res | TPattern.COMMENTS - : (res ^ TPattern.COMMENTS) & res; - break; + case 'x': + res = pos ? res | TPattern.COMMENTS : (res ^ TPattern.COMMENTS) & res; + break; - case ':': - nextIndex(); - return res; + case ':': + nextIndex(); + return res; - case ')': - nextIndex(); - return res | (1 << 8); + case ')': + nextIndex(); + return res | (1 << 8); - default: - // ignore invalid flags (HARMONY-2127) + default: + // ignore invalid flags (HARMONY-2127) } nextIndex(); } throw new TPatternSyntaxException("", this.toString(), index); } - /** * Returns next character index to read and moves pointer to the next one. * If comments flag is on this method will skip comments and whitespaces. @@ -1141,27 +1021,27 @@ class Lexer { } /** - * Gets decomposition for given codepoint from - * decomposition mappings table. + * Gets decomposition for given codepoint from decomposition mappings table. * - * @param ch - Unicode codepoint - * @return array of codepoints that is a canonical - * decomposition of ch. + * @param ch + * - Unicode codepoint + * @return array of codepoints that is a canonical decomposition of ch. */ - static int [] getDecomposition(int ch) { + static int[] getDecomposition(int ch) { return decompTable.get(ch); } /** - * Gets decomposition for given Hangul syllable. - * This is an implementation of Hangul decomposition algorithm - * according to http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf + * Gets decomposition for given Hangul syllable. This is an implementation + * of Hangul decomposition algorithm according to + * http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf * "3.12 Conjoining Jamo Behavior". * - * @param ch - given Hangul syllable + * @param ch + * - given Hangul syllable * @return canonical decomposition of ch. */ - static int [] getHangulDecomposition(int ch) { + static int[] getHangulDecomposition(int ch) { int SIndex = ch - SBase; if (SIndex < 0 || SIndex >= SCount) { @@ -1170,58 +1050,56 @@ class Lexer { int L = LBase + SIndex / NCount; int V = VBase + (SIndex % NCount) / TCount; int T = SIndex % TCount; - int decomp []; + int decomp[]; if (T == 0) { - decomp = new int [] {L, V}; + decomp = new int[] { L, V }; } else { T = TBase + T; - decomp = new int [] {L, V, T}; + decomp = new int[] { L, V, T }; } return decomp; } } /** - * Gets canonical class for given codepoint from - * decomposition mappings table. + * Gets canonical class for given codepoint from decomposition mappings + * table. * * @param - ch Unicode codepoint - * @return canonical class for given Unicode codepoint - * that is represented by ch. + * @return canonical class for given Unicode codepoint that is represented + * by ch. */ static int getCanonicalClass(int ch) { int canClass = canonClassesTable.get(ch); - return (canClass == canonClassesTableSize) - ? 0 - : canClass; + return (canClass == canonClassesTableSize) ? 0 : canClass; } /** * Tests if given codepoint is a canonical decomposition of another * codepoint. * - * @param ch - codepoint to test + * @param ch + * - codepoint to test * @return true if ch is a decomposition. */ static boolean hasSingleCodepointDecomposition(int ch) { int hasSingleDecomp = singleDecompTable.get(ch); /* - * singleDecompTable doesn't contain ch - * == (hasSingleDecomp == singleDecompTableSize) + * singleDecompTable doesn't contain ch == (hasSingleDecomp == + * singleDecompTableSize) */ - return (hasSingleDecomp == singleDecompTableSize) - ? false - : true; + return (hasSingleDecomp == singleDecompTableSize) ? false : true; } /** - * Tests if given codepoint has canonical decomposition - * and given codepoint's canonical class is not 0. + * Tests if given codepoint has canonical decomposition and given + * codepoint's canonical class is not 0. * - * @param ch - codepoint to test + * @param ch + * - codepoint to test * @return true if canonical class is not 0 and ch has a decomposition. */ static boolean hasDecompositionNonNullCanClass(int ch) { @@ -1233,7 +1111,7 @@ class Lexer { if (Character.isHighSurrogate(high)) { - //low and high char may be delimited by spaces + // low and high char may be delimited by spaces int lowExpectedIndex = prevNW + 1; if (lowExpectedIndex < pattern.length) { @@ -1245,23 +1123,24 @@ class Lexer { } } - return (int) high; + return high; } /** - * Tests Unicode codepoint if it is a boundary - * of decomposed Unicode codepoint. + * Tests Unicode codepoint if it is a boundary of decomposed Unicode + * codepoint. * - * @param ch - Unicode codepoint to test + * @param ch + * - Unicode codepoint to test * @return true if given codepoint is a boundary. */ - static boolean isDecomposedCharBoundary(int ch) { - int canClass = canonClassesTable.get(ch); + static boolean isDecomposedCharBoundary(int ch) { + int canClass = canonClassesTable.get(ch); - //Lexer.getCanonicalClass(ch) == 0 - boolean isBoundary = (canClass == canonClassesTableSize); + // Lexer.getCanonicalClass(ch) == 0 + boolean isBoundary = (canClass == canonClassesTableSize); - return isBoundary; + return isBoundary; } /** diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java index f6c103b9e..b9db75eb0 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowHighSurrogateRangeSet.java @@ -105,15 +105,18 @@ class LowHighSurrogateRangeSet extends JointSet { /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; } @@ -121,8 +124,8 @@ class LowHighSurrogateRangeSet extends JointSet { /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int startStr = matchResult.getLeftBound(); int strLength = matchResult.getRightBound(); @@ -160,10 +163,12 @@ class LowHighSurrogateRangeSet extends JointSet { return next.matches(stringIndex + 1, testString, matchResult); } + @Override protected String getName() { return "range:" + (alt ? "^ " : " ") + surrChars.toString(); } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { return false; @@ -182,6 +187,7 @@ class LowHighSurrogateRangeSet extends JointSet { return surrChars; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java index 008c8fa08..5af40e9c0 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/LowSurrogateCharSet.java @@ -85,12 +85,12 @@ package org.teavm.classlib.java.util.regex; /** * This class represents low surrogate character. */ -class LowSurrogateCharSet extends JointSet{ +class LowSurrogateCharSet extends JointSet { /* - * Note that we can use high and low surrogate characters - * that don't combine into supplementary code point. - * See http://www.unicode.org/reports/tr18/#Supplementary_Characters + * Note that we can use high and low surrogate characters that don't combine + * into supplementary code point. See + * http://www.unicode.org/reports/tr18/#Supplementary_Characters */ private char low; @@ -101,21 +101,24 @@ class LowSurrogateCharSet extends JointSet{ /** * Returns the next. */ + @Override public AbstractSet getNext() { return this.next; } /** * Sets next abstract set. + * * @param next * The next to set. */ + @Override public void setNext(AbstractSet next) { this.next = next; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (stringIndex + 1 > matchResult.getRightBound()) { matchResult.hitEnd = true; @@ -128,8 +131,8 @@ class LowSurrogateCharSet extends JointSet{ char high = testString.charAt(stringIndex - 1); /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isHighSurrogate(high)) { return -1; @@ -137,17 +140,16 @@ class LowSurrogateCharSet extends JointSet{ } if (this.low == low) { - return next.matches(stringIndex + 1, testString, - matchResult); + return next.matches(stringIndex + 1, testString, matchResult); } return -1; } - public int find(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; int startStr = matchResult.getLeftBound(); int strLength = matchResult.getRightBound(); @@ -160,8 +162,8 @@ class LowSurrogateCharSet extends JointSet{ if (strIndex > startStr) { /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isHighSurrogate(testStr.charAt(strIndex - 1))) { strIndex++; @@ -181,11 +183,11 @@ class LowSurrogateCharSet extends JointSet{ return super.find(strIndex, testString, matchResult); } - public int findBack(int strIndex, int lastIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { int startStr = matchResult.getLeftBound(); - String testStr = (String) testString; + String testStr = (String)testString; while (lastIndex >= strIndex) { lastIndex = testStr.lastIndexOf(low, lastIndex); @@ -196,8 +198,8 @@ class LowSurrogateCharSet extends JointSet{ if (lastIndex > startStr) { /* - * we consider high surrogate followed by - * low surrogate as a codepoint + * we consider high surrogate followed by low surrogate as a + * codepoint */ if (Character.isHighSurrogate(testStr.charAt(lastIndex - 1))) { lastIndex -= 2; @@ -218,6 +220,7 @@ class LowSurrogateCharSet extends JointSet{ return super.findBack(strIndex, lastIndex, testString, matchResult); } + @Override protected String getName() { return "" + low; } @@ -226,6 +229,7 @@ class LowSurrogateCharSet extends JointSet{ return low; } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { return false; @@ -238,12 +242,13 @@ class LowSurrogateCharSet extends JointSet{ } else if (set instanceof HighSurrogateCharSet) { return false; } else if (set instanceof LowSurrogateCharSet) { - return ((LowSurrogateCharSet) set).low == this.low; + return ((LowSurrogateCharSet)set).low == this.low; } return true; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java index 0ebc0e218..52d136723 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MatchResultImpl.java @@ -22,10 +22,9 @@ package org.teavm.classlib.java.util.regex; import java.util.Arrays; - /** - * Match result implementation - * Note: probably it might make sense to combine this class with Matcher. + * Match result implementation Note: probably it might make sense to combine + * this class with Matcher. * * @author Nikolay A. Kuznetsov */ @@ -61,8 +60,8 @@ class MatchResultImpl implements TMatchResult { private int mode; - MatchResultImpl(CharSequence string, int leftBound, int rightBound, - int groupCount, int compQuantCount, int consumersCount) { + MatchResultImpl(CharSequence string, int leftBound, int rightBound, int groupCount, int compQuantCount, + int consumersCount) { this.groupCount = ++groupCount; this.groupBounds = new int[groupCount * 2]; @@ -76,13 +75,12 @@ class MatchResultImpl implements TMatchResult { } TMatchResult cloneImpl() { - MatchResultImpl res = new MatchResultImpl(this.string, this.leftBound, - this.rightBound, this.groupCount - 1, 0, 0); + MatchResultImpl res = new MatchResultImpl(this.string, this.leftBound, this.rightBound, this.groupCount - 1, 0, + 0); res.valid = valid; if (valid) { - System.arraycopy(groupBounds, 0, res.groupBounds, 0, - this.groupBounds.length); + System.arraycopy(groupBounds, 0, res.groupBounds, 0, this.groupBounds.length); } return res; } @@ -95,10 +93,12 @@ class MatchResultImpl implements TMatchResult { return this.consumers[counter]; } + @Override public int end() { return end(0); } + @Override public int end(int group) { checkGroup(group); return groupBounds[group * 2 + 1]; @@ -120,10 +120,12 @@ class MatchResultImpl implements TMatchResult { return groupBounds[group * 2 + 1]; } + @Override public String group() { return group(0); } + @Override public String group(int group) { if (start(group) < 0) return null; @@ -139,14 +141,17 @@ class MatchResultImpl implements TMatchResult { return string.subSequence(st, end).toString(); } + @Override public int groupCount() { return groupCount - 1; } + @Override public int start() { return start(0); } + @Override public int start(int group) { checkGroup(group); return groupBounds[group * 2]; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java index fd611af76..19428b49b 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineEOLSet.java @@ -33,11 +33,10 @@ class MultiLineEOLSet extends AbstractSet { this.consCounter = counter; } - public int matches(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { - int strDif = matchResult.hasAnchoringBounds() ? matchResult - .getLeftBound() - - strIndex : testString.length() - strIndex; + @Override + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + int strDif = matchResult.hasAnchoringBounds() ? matchResult.getLeftBound() - strIndex : testString.length() - + strIndex; char ch1; char ch2; if (strDif == 0) { @@ -52,28 +51,29 @@ class MultiLineEOLSet extends AbstractSet { } switch (ch1) { - case '\r': { - if (ch2 == '\n') { + case '\r': { + if (ch2 == '\n') { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } matchResult.setConsumed(consCounter, 0); return next.matches(strIndex, testString, matchResult); } - matchResult.setConsumed(consCounter, 0); - return next.matches(strIndex, testString, matchResult); - } - case '\n': - case '\u0085': - case '\u2028': - case '\u2029': { - matchResult.setConsumed(consCounter, 0); - return next.matches(strIndex, testString, matchResult); - } + case '\n': + case '\u0085': + case '\u2028': + case '\u2029': { + matchResult.setConsumed(consCounter, 0); + return next.matches(strIndex, testString, matchResult); + } - default: - return -1; + default: + return -1; } } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons; boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); @@ -81,6 +81,7 @@ class MultiLineEOLSet extends AbstractSet { return res; } + @Override protected String getName() { return ""; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java index 175805672..ca49d0069 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/MultiLineSOLSet.java @@ -22,6 +22,7 @@ package org.teavm.classlib.java.util.regex; /** * Multiline version of the ^ sign. + * * @author Nikolay A. Kuznetsov */ class MultiLineSOLSet extends AbstractSet { @@ -32,25 +33,23 @@ class MultiLineSOLSet extends AbstractSet { this.lt = lt; } - public int matches(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { - if (strIndex != matchResult.getRightBound() - && ((strIndex == 0 || (matchResult.hasAnchoringBounds() && strIndex == matchResult - .getLeftBound())) || lt.isAfterLineTerminator( - testString.charAt(strIndex - 1), testString - .charAt(strIndex)))) { + @Override + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + if (strIndex != matchResult.getRightBound() && + ((strIndex == 0 || (matchResult.hasAnchoringBounds() && strIndex == matchResult.getLeftBound())) || lt + .isAfterLineTerminator(testString.charAt(strIndex - 1), testString.charAt(strIndex)))) { return next.matches(strIndex, testString, matchResult); } - return -1; - } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "^"; //$NON-NLS-1$ + return "^"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java index 63d17f927..a93dfd4ce 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookAhead.java @@ -29,19 +29,19 @@ import java.util.ArrayList; */ class NegativeLookAhead extends AtomicJointSet { - public NegativeLookAhead(ArrayList children, FSet fSet) { + public NegativeLookAhead(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int size = children.size(); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); if (e.matches(stringIndex, testString, matchResult) >= 0) return -1; } @@ -49,11 +49,13 @@ class NegativeLookAhead extends AtomicJointSet { return next.matches(stringIndex, testString, matchResult); } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "NegLookaheadJointSet"; //$NON-NLS-1$ + return "NegLookaheadJointSet"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java index 6dd0998f3..5eb334473 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NegativeLookBehind.java @@ -29,13 +29,14 @@ import java.util.ArrayList; */ class NegativeLookBehind extends AtomicJointSet { - public NegativeLookBehind(ArrayList children, FSet fSet) { + public NegativeLookBehind(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ + @Override public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { @@ -47,7 +48,7 @@ class NegativeLookBehind extends AtomicJointSet { matchResult.setConsumed(groupIndex, stringIndex); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); // find limits could be calculated though e.getCharCount() // fSet will return true only if string index at fSet equal // to stringIndex @@ -60,11 +61,13 @@ class NegativeLookBehind extends AtomicJointSet { return next.matches(stringIndex, testString, matchResult); } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "NegBehindJointSet"; //$NON-NLS-1$ + return "NegBehindJointSet"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java index c88aa8d9d..5e07f81d7 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapFSet.java @@ -26,24 +26,24 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class NonCapFSet extends FSet { - public NonCapFSet(int groupIndex) { super(groupIndex); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int gr = getGroupIndex(); matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr)); return next.matches(stringIndex, testString, matchResult); } + @Override protected String getName() { - return "NonCapFSet"; //$NON-NLS-1$ + return "NonCapFSet"; } + @Override public boolean hasConsumed(MatchResultImpl mr) { return false; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java index 29ee3393f..3a28acc93 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/NonCapJointSet.java @@ -31,13 +31,14 @@ class NonCapJointSet extends JointSet { protected NonCapJointSet() { } - public NonCapJointSet(ArrayList children, FSet fSet) { + public NonCapJointSet(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ + @Override public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int start = matchResult.getConsumed(groupIndex); @@ -45,7 +46,7 @@ class NonCapJointSet extends JointSet { int size = children.size(); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); int shift = e.matches(stringIndex, testString, matchResult); if (shift >= 0) { return shift; @@ -55,10 +56,12 @@ class NonCapJointSet extends JointSet { return -1; } + @Override protected String getName() { return "NonCapJointSet"; //$NON-NLS-1$ } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons = matchResult.getConsumed(groupIndex); return cons != 0; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java index d9746191a..8f9e83767 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosAltGroupQuantifierSet.java @@ -21,29 +21,28 @@ package org.teavm.classlib.java.util.regex; /** - * Possessive quantifier over group, see java.util.regex.GroupQuantifierSet - * for more details. + * Possessive quantifier over group, see java.util.regex.GroupQuantifierSet for + * more details. * * @author Nikolay A. Kuznetsov */ class PosAltGroupQuantifierSet extends AltGroupQuantifierSet { - public PosAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public PosAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); - ((JointSet) innerSet).setNext(FSet.posFSet); + ((JointSet)innerSet).setNext(FSet.posFSet); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int nextIndex = innerSet.matches(stringIndex, testString, matchResult); - if (nextIndex > 0) + if (nextIndex > 0) { stringIndex = nextIndex; - + } return next.matches(stringIndex, testString, matchResult); } + @Override public void setNext(AbstractSet next) { this.next = next; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java index 64dc5f7e5..061bd2018 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosCompositeGroupQuantifierSet.java @@ -22,26 +22,24 @@ package org.teavm.classlib.java.util.regex; /** * Possessive composite (i.e. {n,m}) quantifier node over groups. + * * @author Nikolay A. Kuznetsov */ class PosCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet { - public PosCompositeGroupQuantifierSet(Quantifier quant, - AbstractSet innerSet, AbstractSet next, int type, int setCounter) { + public PosCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type, + int setCounter) { super(quant, innerSet, next, type, setCounter); innerSet.setNext(FSet.posFSet); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int nextIndex; int counter = 0; int max = quantifier.max(); - while ((nextIndex = innerSet.matches(stringIndex, testString, - matchResult)) > stringIndex - && counter < max) { + while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex && counter < max) { counter++; stringIndex = nextIndex; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java index 1f8ae74dc..265ea0e99 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PosPlusGroupQuantifierSet.java @@ -27,23 +27,19 @@ package org.teavm.classlib.java.util.regex; */ class PosPlusGroupQuantifierSet extends GroupQuantifierSet { - public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); - ((JointSet) innerSet).setNext(FSet.posFSet); - + ((JointSet)innerSet).setNext(FSet.posFSet); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int nextIndex; if ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) < 0) { return -1; } else if (nextIndex > stringIndex) { stringIndex = nextIndex; - while ((nextIndex = innerSet.matches(stringIndex, testString, - matchResult)) > stringIndex) { + while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex) { stringIndex = nextIndex; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java index 545572519..66e4ea841 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookAhead.java @@ -28,19 +28,18 @@ import java.util.ArrayList; * @author Nikolay A. Kuznetsov */ class PositiveLookAhead extends AtomicJointSet { - - public PositiveLookAhead(ArrayList children, FSet fSet) { + public PositiveLookAhead(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int size = children.size(); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); int shift = e.matches(stringIndex, testString, matchResult); if (shift >= 0) { // PosLookaheadFset always returns true, position remains the @@ -53,11 +52,13 @@ class PositiveLookAhead extends AtomicJointSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "PosLookaheadJointSet"; //$NON-NLS-1$ + return "PosLookaheadJointSet"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java index d3425bbfe..9392ddac9 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PositiveLookBehind.java @@ -29,31 +29,30 @@ import java.util.ArrayList; */ class PositiveLookBehind extends AtomicJointSet { - public PositiveLookBehind(ArrayList children, FSet fSet) { + public PositiveLookBehind(ArrayList children, FSet fSet) { super(children, fSet); } /** * Returns stringIndex+shift, the next position to match */ - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int size = children.size(); - int leftBound = matchResult.hasTransparentBounds()? - 0 : matchResult.getLeftBound(); + int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound(); int shift = next.matches(stringIndex, testString, matchResult); if (shift >= 0) { - //fSet will take this index to check if we at the right bound + // fSet will take this index to check if we at the right bound // and return true if the current index equal to this one matchResult.setConsumed(groupIndex, stringIndex); for (int i = 0; i < size; i++) { - AbstractSet e = (AbstractSet) children.get(i); + AbstractSet e = children.get(i); // find limits could be calculated though e.getCharCount() // fSet will return true only if string index at fSet equal // to stringIndex - if (e.findBack(leftBound, stringIndex, testString, matchResult) >=0) { + if (e.findBack(leftBound, stringIndex, testString, matchResult) >= 0) { matchResult.setConsumed(groupIndex, -1); return shift; } @@ -63,11 +62,13 @@ class PositiveLookBehind extends AtomicJointSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "PosBehindJointSet"; //$NON-NLS-1$ + return "PosBehindJointSet"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java index 920189678..2c27aa3b4 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveAltQuantifierSet.java @@ -26,18 +26,16 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class PossessiveAltQuantifierSet extends AltQuantifierSet { - - public PossessiveAltQuantifierSet(LeafSet innerSet, AbstractSet next, - int type) { + public PossessiveAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift = 0; - if (stringIndex + leaf.charCount() <= matchResult.getRightBound() - && (shift = leaf.accepts(stringIndex, testString)) >= 1) { + if (stringIndex + leaf.charCount() <= matchResult.getRightBound() && + (shift = leaf.accepts(stringIndex, testString)) >= 1) { stringIndex += shift; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java index bf3408373..525bdca12 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveCompositeQuantifierSet.java @@ -26,14 +26,12 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class PossessiveCompositeQuantifierSet extends CompositeQuantifierSet { - - public PossessiveCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, - AbstractSet next, int type) { + public PossessiveCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) { super(quant, innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int min = quantifier.min(); int max = quantifier.max(); int i = 0; @@ -52,8 +50,8 @@ class PossessiveCompositeQuantifierSet extends CompositeQuantifierSet { for (; i < max; i++) { int shift; - if (stringIndex + leaf.charCount() > matchResult.getRightBound() - || (shift = leaf.accepts(stringIndex, testString)) < 1) { + if (stringIndex + leaf.charCount() > matchResult.getRightBound() || + (shift = leaf.accepts(stringIndex, testString)) < 1) { break; } stringIndex += shift; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java index 14671d03a..ce748b26a 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveGroupQuantifierSet.java @@ -26,20 +26,16 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class PossessiveGroupQuantifierSet extends GroupQuantifierSet { - - public PossessiveGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public PossessiveGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); innerSet.setNext(FSet.posFSet); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int nextIndex; - while ((nextIndex = innerSet.matches(stringIndex, testString, - matchResult)) > 0) { + while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > 0) { stringIndex = nextIndex; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java index cfdc1f2b9..5fd7c7072 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PossessiveQuantifierSet.java @@ -31,11 +31,11 @@ class PossessiveQuantifierSet extends LeafQuantifierSet { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift = 0; - while (stringIndex + leaf.charCount() <= matchResult.getRightBound() - && (shift = leaf.accepts(stringIndex, testString)) >= 1) { + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() && + (shift = leaf.accepts(stringIndex, testString)) >= 1) { stringIndex += shift; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java index 90e0a502a..d6368da51 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/PreviousMatch.java @@ -26,11 +26,7 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class PreviousMatch extends AbstractSet { - - /** - * @see java.util.regex.AbstractSet#matches(int, java.lang.CharSequence, - * java.util.regex.MatchResultImpl) - */ + @Override public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (stringIndex == matchResult.getPreviousMatchEnd()) { @@ -39,11 +35,13 @@ class PreviousMatch extends AbstractSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return "PreviousMatch"; //$NON-NLS-1$ + return "PreviousMatch"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java index f86de69dd..7b49303fb 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/Quantifier.java @@ -64,18 +64,17 @@ class Quantifier extends SpecialToken implements Cloneable { return max; } + @Override public String toString() { - return "{" //$NON-NLS-1$ - + min - + "," //$NON-NLS-1$ - + ((max == Integer.MAX_VALUE) ? "" : new Integer(max) //$NON-NLS-1$ - .toString()) + "}"; //$NON-NLS-1$ + return "{" + min + "," + ((max == Integer.MAX_VALUE) ? "" : new Integer(max).toString()) + "}"; } + @Override public int getType() { return SpecialToken.TOK_QUANTIFIER; } + @Override public Object clone() { return new Quantifier(min, max); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java index 39dcbce1e..450e81057 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/QuantifierSet.java @@ -44,6 +44,7 @@ abstract class QuantifierSet extends AbstractSet { /** * Sets an inner set. + * * @param innerSet * The innerSet to set. */ @@ -51,82 +52,83 @@ abstract class QuantifierSet extends AbstractSet { this.innerSet = innerSet; } + @Override public boolean first(AbstractSet set) { return innerSet.first(set) || next.first(set); } + @Override public boolean hasConsumed(MatchResultImpl mr) { return true; } /** - * This method is used for traversing nodes after the - * first stage of compilation. + * This method is used for traversing nodes after the first stage of + * compilation. */ + @Override public void processSecondPass() { - this.isSecondPassVisited = true; + this.isSecondPassVisited = true; - if (next != null) { + if (next != null) { - if (!next.isSecondPassVisited) { + if (!next.isSecondPassVisited) { - /* - * Add here code to do during the pass - */ - JointSet set = next.processBackRefReplacement(); + /* + * Add here code to do during the pass + */ + JointSet set = next.processBackRefReplacement(); - if (set != null) { - next.isSecondPassVisited = true; - next =(AbstractSet) set; - } + if (set != null) { + next.isSecondPassVisited = true; + next = set; + } - /* - * End code to do during the pass - */ - next.processSecondPass(); - } - } + /* + * End code to do during the pass + */ + next.processSecondPass(); + } + } - if (innerSet != null) { + if (innerSet != null) { - if (!innerSet.isSecondPassVisited) { + if (!innerSet.isSecondPassVisited) { - /* - * Add here code to do during the pass - */ - JointSet set = innerSet.processBackRefReplacement(); + /* + * Add here code to do during the pass + */ + JointSet set = innerSet.processBackRefReplacement(); - if (set != null) { - innerSet.isSecondPassVisited = true; - innerSet =(AbstractSet) set; - } + if (set != null) { + innerSet.isSecondPassVisited = true; + innerSet = set; + } - /* - * End code to do during the pass - */ - innerSet.processSecondPass(); - } else { + /* + * End code to do during the pass + */ + innerSet.processSecondPass(); + } else { - /* - * We reach node through innerSet but it is already traversed. - * You can see this situation for GroupQuantifierSet.innerset - * if we compile smth like "(a)+ when - * GroupQuantifierSet == GroupQuantifierSet.innerset.fSet.next - */ + /* + * We reach node through innerSet but it is already traversed. + * You can see this situation for GroupQuantifierSet.innerset if + * we compile smth like "(a)+ when GroupQuantifierSet == + * GroupQuantifierSet.innerset.fSet.next + */ - /* - * Add here code to do during the pass - */ - if (innerSet instanceof SingleSet - && ((FSet) ((JointSet) innerSet).fSet) - .isBackReferenced) { - innerSet = innerSet.next; - } + /* + * Add here code to do during the pass + */ + if (innerSet instanceof SingleSet && ((FSet)((JointSet)innerSet).fSet).isBackReferenced) { + innerSet = innerSet.next; + } - /* - * End code to do during the pass - */ - } - } + /* + * End code to do during the pass + */ + } + } } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java index f17930efb..23cd8e742 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RangeSet.java @@ -43,24 +43,24 @@ class RangeSet extends LeafSet { this.alt = cc.alt; } + @Override public int accepts(int strIndex, CharSequence testString) { return chars.contains(testString.charAt(strIndex)) ? 1 : -1; } + @Override protected String getName() { - return "range:" + (alt ? "^ " : " ") + chars.toString(); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + return "range:" + (alt ? "^ " : " ") + chars.toString(); } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { - return AbstractCharClass.intersects(chars, ((CharSet) set) - .getChar()); + return AbstractCharClass.intersects(chars, ((CharSet)set).getChar()); } else if (set instanceof RangeSet) { - return AbstractCharClass.intersects(chars, ((RangeSet) set) - .chars); + return AbstractCharClass.intersects(chars, ((RangeSet)set).chars); } else if (set instanceof SupplRangeSet) { - return AbstractCharClass.intersects(chars, ((SupplRangeSet) set) - .getChars()); + return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).getChars()); } else if (set instanceof SupplCharSet) { return false; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java index 204eba0b4..49f791174 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelAltGroupQuantifierSet.java @@ -22,17 +22,17 @@ package org.teavm.classlib.java.util.regex; /** * Reluctant version of "?" quantifier set over group. + * * @author Nikolay A. Kuznetsov */ class RelAltGroupQuantifierSet extends AltGroupQuantifierSet { - public RelAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public RelAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (!innerSet.hasConsumed(matchResult)) return next.matches(stringIndex, testString, matchResult); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java index a697b91d0..b0196527c 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/RelCompositeGroupQuantifierSet.java @@ -21,20 +21,19 @@ package org.teavm.classlib.java.util.regex; /** - * Reluctant version of composite (i.e. {n,m}) quantifier node over - * group. + * Reluctant version of composite (i.e. {n,m}) quantifier node over group. * * @author Nikolay A. Kuznetsov */ class RelCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet { - public RelCompositeGroupQuantifierSet(Quantifier quant, - AbstractSet innerSet, AbstractSet next, int type, int setCounter) { + public RelCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type, + int setCounter) { super(quant, innerSet, next, type, setCounter); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int enterCounter = matchResult.getEnterCounter(setCounter); if (!innerSet.hasConsumed(matchResult)) @@ -52,8 +51,7 @@ class RelCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet { nextIndex = next.matches(stringIndex, testString, matchResult); if (nextIndex < 0) { matchResult.setEnterCounter(setCounter, ++enterCounter); - nextIndex = innerSet.matches(stringIndex, testString, - matchResult); + nextIndex = innerSet.matches(stringIndex, testString, matchResult); } else { matchResult.setEnterCounter(setCounter, 0); return nextIndex; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java index d30794de5..3548d9574 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantAltQuantifierSet.java @@ -26,14 +26,12 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class ReluctantAltQuantifierSet extends AltQuantifierSet { - - public ReluctantAltQuantifierSet(LeafSet innerSet, AbstractSet next, - int type) { + public ReluctantAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift; if ((shift = next.matches(stringIndex, testString, matchResult)) >= 0) { diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java index 5e26b466f..084a24008 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantCompositeQuantifierSet.java @@ -22,17 +22,16 @@ package org.teavm.classlib.java.util.regex; /** * Reluctant version of composite(i.e. {n,m}) quantifier set over leaf nodes. + * * @author Nikolay A. Kuznetsov */ class ReluctantCompositeQuantifierSet extends CompositeQuantifierSet { - - public ReluctantCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, - AbstractSet next, int type) { + public ReluctantCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) { super(quant, innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int min = quantifier.min(); int max = quantifier.max(); int i = 0; diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java index 1d94564bb..497340f09 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantGroupQuantifierSet.java @@ -26,15 +26,12 @@ package org.teavm.classlib.java.util.regex; * @author Nikolay A. Kuznetsov */ class ReluctantGroupQuantifierSet extends GroupQuantifierSet { - - public ReluctantGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, - int type) { + public ReluctantGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { if (!innerSet.hasConsumed(matchResult)) return next.matches(stringIndex, testString, matchResult); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java index 44a789159..3dbaf5f3b 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/ReluctantQuantifierSet.java @@ -32,8 +32,8 @@ class ReluctantQuantifierSet extends LeafQuantifierSet { super(innerSet, next, type); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int shift = 0; do { diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java index ed08dd4eb..6e56ceb67 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SOLSet.java @@ -27,6 +27,7 @@ package org.teavm.classlib.java.util.regex; */ final class SOLSet extends AbstractSet { + @Override public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { if (strIndex == 0 @@ -37,11 +38,13 @@ final class SOLSet extends AbstractSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { return false; } + @Override protected String getName() { - return ""; //$NON-NLS-1$ + return ""; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java index 2c17d8c08..b88fe34f7 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SequenceSet.java @@ -55,10 +55,12 @@ class SequenceSet extends LeafSet { } } + @Override public int accepts(int strIndex, CharSequence testString) { return startsWith(testString, strIndex) ? charCount : -1; } + @Override public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { @@ -78,6 +80,7 @@ class SequenceSet extends LeafSet { return -1; } + @Override public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { @@ -95,10 +98,12 @@ class SequenceSet extends LeafSet { return -1; } + @Override public String getName() { return "sequence: " + string; //$NON-NLS-1$ } + @Override public boolean first(AbstractSet set) { if (set instanceof CharSet) { return ((CharSet) set).getChar() == string.charAt(0); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java deleted file mode 100644 index e4f9719b7..000000000 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleDecompositions.java +++ /dev/null @@ -1,971 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.teavm.classlib.java.util.regex; - -/** - * This class gives us a hashtable that contains information about - * symbols that are one symbol decompositions that is - * generated from - * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt. - */ -class SingleDecompositions{ - - private static IntHash table = null; - - private SingleDecompositions() { - } - - public static IntHash getHashSingleDecompositions() { - if (table != null) { - return table; - } else { - table = new IntHash(1030); - table.put(0x6688, 0); - table.put(0x00B7, 0); - table.put(0x86A9, 0); - table.put(0x00B4, 0); - table.put(0x7409, 0); - table.put(0x559D, 0); - table.put(0x884C, 0); - table.put(0x523B, 0); - table.put(0x6DCB, 0); - table.put(0x8AFE, 0); - table.put(0x523A, 0); - table.put(0x7406, 0); - table.put(0x53F1, 0); - table.put(0x5599, 0); - table.put(0x6F6E, 0); - table.put(0x8AF8, 0); - table.put(0x3862, 0); - table.put(0x5B28, 0); - table.put(0x53EB, 0); - table.put(0x2300A, 0); - table.put(0x9929, 0); - table.put(0x9928, 0); - table.put(0x6674, 0); - table.put(0x8046, 0); - table.put(0x53E5, 0); - table.put(0x8AED, 0); - table.put(0x2054B, 0); - table.put(0x5229, 0); - table.put(0x64C4, 0); - table.put(0x278AE, 0); - table.put(0x53DF, 0); - table.put(0x5587, 0); - table.put(0x2335F, 0); - table.put(0x5584, 0); - table.put(0x92D8, 0); - table.put(0x92D7, 0); - table.put(0x5E74, 0); - table.put(0x774A, 0); - table.put(0x911B, 0); - table.put(0x81ED, 0); - table.put(0x78FB, 0); - table.put(0x5CC0, 0); - table.put(0x7740, 0); - table.put(0x81E8, 0); - table.put(0x9111, 0); - table.put(0x9D67, 0); - table.put(0x5217, 0); - table.put(0x2A20E, 0); - table.put(0x5B08, 0); - table.put(0x5E69, 0); - table.put(0x8AD6, 0); - table.put(0x267B5, 0); - table.put(0x412F, 0); - table.put(0x53CA, 0); - table.put(0x8AD2, 0); - table.put(0x881F, 0); - table.put(0x53C3, 0); - table.put(0x2940A, 0); - table.put(0x8ACB, 0); - table.put(0x256C5, 0); - table.put(0x4BCE, 0); - table.put(0x81D8, 0); - table.put(0x5207, 0); - table.put(0x267A7, 0); - table.put(0x3AE4, 0); - table.put(0x5203, 0); - table.put(0x5563, 0); - table.put(0x7295, 0); - table.put(0x20525, 0); - table.put(0x6649, 0); - table.put(0x23F8E, 0); - table.put(0x382F, 0); - table.put(0x295B6, 0); - table.put(0x67FA, 0); - table.put(0x8ABF, 0); - table.put(0x2051C, 0); - table.put(0x03CE, 0); - table.put(0x67F3, 0); - table.put(0x9EFE, 0); - table.put(0x03CD, 0); - table.put(0x03CC, 0); - table.put(0x5555, 0); - table.put(0x5553, 0); - table.put(0x771F, 0); - table.put(0x8801, 0); - table.put(0x771E, 0); - table.put(0x446B, 0); - table.put(0x7280, 0); - table.put(0x9EF9, 0); - table.put(0x8005, 0); - table.put(0x70D9, 0); - table.put(0x8363, 0); - table.put(0x8001, 0); - table.put(0x5FF9, 0); - table.put(0x6F23, 0); - table.put(0x6F22, 0); - table.put(0x8C55, 0); - table.put(0x5E3D, 0); - table.put(0x5FF5, 0); - table.put(0x8AAA, 0); - table.put(0x78CC, 0); - table.put(0x78CA, 0); - table.put(0x4D56, 0); - table.put(0x6190, 0); - table.put(0x8AA0, 0); - table.put(0x618E, 0); - table.put(0x03B9, 0); - table.put(0x56F9, 0); - table.put(0x273CA, 0); - table.put(0x70C8, 0); - table.put(0x8353, 0); - table.put(0x8C48, 0); - table.put(0x8352, 0); - table.put(0x03B0, 0); - table.put(0x25044, 0); - table.put(0x7701, 0); - table.put(0x03AF, 0); - table.put(0x03AE, 0); - table.put(0x6F0F, 0); - table.put(0x03AD, 0); - table.put(0x03AC, 0); - table.put(0x5E28, 0); - table.put(0x8F9E, 0); - table.put(0x3009, 0); - table.put(0x7262, 0); - table.put(0x3008, 0); - table.put(0x23F5E, 0); - table.put(0x8DF0, 0); - table.put(0x03A9, 0); - table.put(0x42A0, 0); - table.put(0x5099, 0); - table.put(0x262D9, 0); - table.put(0x617A, 0); - table.put(0x6613, 0); - table.put(0x84FC, 0); - table.put(0x8DEF, 0); - table.put(0x5FD7, 0); - table.put(0x26D6B, 0); - table.put(0x27CA8, 0); - table.put(0x9ECE, 0); - table.put(0x22B0C, 0); - table.put(0x3C4E, 0); - table.put(0x84F3, 0); - table.put(0x25C80, 0); - table.put(0x45F9, 0); - table.put(0x84F1, 0); - table.put(0x70AD, 0); - table.put(0x5FCD, 0); - table.put(0x7250, 0); - table.put(0x4EE4, 0); - table.put(0x8336, 0); - table.put(0x84EE, 0); - table.put(0x6168, 0); - table.put(0x9415, 0); - table.put(0x24735, 0); - table.put(0x6160, 0); - table.put(0x9EBB, 0); - table.put(0x507A, 0); - table.put(0x5510, 0); - table.put(0x8688, 0); - table.put(0x8323, 0); - table.put(0x7CE8, 0); - table.put(0x8DCB, 0); - table.put(0x7CE7, 0); - table.put(0x7CE3, 0); - table.put(0x4ECC, 0); - table.put(0x831D, 0); - table.put(0x7235, 0); - table.put(0x45D7, 0); - table.put(0x95B7, 0); - table.put(0x5FAD, 0); - table.put(0x614E, 0); - table.put(0x614C, 0); - table.put(0x7B20, 0); - table.put(0x6942, 0); - table.put(0x4EC0, 0); - table.put(0x585E, 0); - table.put(0x5FA9, 0); - table.put(0x8DBC, 0); - table.put(0x585A, 0); - table.put(0x722B, 0); - table.put(0x6148, 0); - table.put(0x8F62, 0); - table.put(0x23A8D, 0); - table.put(0x7CD6, 0); - table.put(0x9756, 0); - table.put(0x95AD, 0); - table.put(0x62FE, 0); - table.put(0x6144, 0); - table.put(0x62FC, 0); - table.put(0x7CD2, 0); - table.put(0x7228, 0); - table.put(0x73DE, 0); - table.put(0x25AA7, 0); - table.put(0x440B, 0); - table.put(0x8669, 0); - table.put(0x34DF, 0); - table.put(0x649D, 0); - table.put(0x8667, 0); - table.put(0x3C18, 0); - table.put(0x649A, 0); - table.put(0x9748, 0); - table.put(0x721B, 0); - table.put(0x98FC, 0); - table.put(0x4EAE, 0); - table.put(0x6D85, 0); - table.put(0x865C, 0); - table.put(0x5840, 0); - table.put(0x20804, 0); - table.put(0x90FD, 0); - table.put(0x7210, 0); - table.put(0x7CBE, 0); - table.put(0x7570, 0); - table.put(0x24263, 0); - table.put(0x0060, 0); - table.put(0x8650, 0); - table.put(0x29B30, 0); - table.put(0x98EF, 0); - table.put(0x90F1, 0); - table.put(0x6D78, 0); - table.put(0x9732, 0); - table.put(0x6D77, 0); - table.put(0x51F5, 0); - table.put(0x6A13, 0); - table.put(0x2A392, 0); - table.put(0x59EC, 0); - table.put(0x5832, 0); - table.put(0x7565, 0); - table.put(0x5831, 0); - table.put(0x5C8D, 0); - table.put(0x8F3B, 0); - table.put(0x647E, 0); - table.put(0x2569A, 0); - table.put(0x62D4, 0); - table.put(0x62D3, 0); - table.put(0x28BFA, 0); - table.put(0x6914, 0); - table.put(0x98E2, 0); - table.put(0x34BB, 0); - table.put(0x8F38, 0); - table.put(0x6D6A, 0); - table.put(0x73B2, 0); - table.put(0x34B9, 0); - table.put(0x62CF, 0); - table.put(0x6477, 0); - table.put(0x226D4, 0); - table.put(0x25F86, 0); - table.put(0x6D69, 0); - table.put(0x9723, 0); - table.put(0x863F, 0); - table.put(0x004B, 0); - table.put(0x7559, 0); - table.put(0x502B, 0); - table.put(0x6A02, 0); - table.put(0x6BBB, 0); - table.put(0x6BBA, 0); - table.put(0x62C9, 0); - table.put(0x8F2A, 0); - table.put(0x7E45, 0); - table.put(0x7E41, 0); - table.put(0x7FFA, 0); - table.put(0x51DE, 0); - table.put(0x59D8, 0); - table.put(0x8996, 0); - table.put(0x51DC, 0); - table.put(0x8F26, 0); - table.put(0x73A5, 0); - table.put(0x6108, 0); - table.put(0x6469, 0); - table.put(0x927C, 0); - table.put(0x9B6F, 0); - table.put(0x537F, 0); - table.put(0x6BAE, 0); - table.put(0x003B, 0); - table.put(0x862D, 0); - table.put(0x537D, 0); - table.put(0x2921A, 0); - table.put(0x898B, 0); - table.put(0x87E1, 0); - table.put(0x5C6E, 0); - table.put(0x90CE, 0); - table.put(0x9276, 0); - table.put(0x7E37, 0); - table.put(0x62B1, 0); - table.put(0x8986, 0); - table.put(0x5375, 0); - table.put(0x51CC, 0); - table.put(0x5373, 0); - table.put(0x580D, 0); - table.put(0x0390, 0); - table.put(0x753E, 0); - table.put(0x5C65, 0); - table.put(0x5C64, 0); - table.put(0x76F4, 0); - table.put(0x21D0B, 0); - table.put(0x40E3, 0); - table.put(0x753B, 0); - table.put(0x5C62, 0); - table.put(0x5C60, 0); - table.put(0x51C9, 0); - table.put(0x038F, 0); - table.put(0x038E, 0); - table.put(0x6452, 0); - table.put(0x038C, 0); - table.put(0x038A, 0); - table.put(0x6D41, 0); - table.put(0x5AB5, 0); - table.put(0x28D77, 0); - table.put(0x0389, 0); - table.put(0x7891, 0); - table.put(0x0388, 0); - table.put(0x0386, 0); - table.put(0x0385, 0); - table.put(0x265A8, 0); - table.put(0x6D3E, 0); - table.put(0x7099, 0); - table.put(0x5002, 0); - table.put(0x8612, 0); - table.put(0x76E7, 0); - table.put(0x788C, 0); - table.put(0x51B7, 0); - table.put(0x2284A, 0); - table.put(0x51B5, 0); - table.put(0x6D34, 0); - table.put(0x535A, 0); - table.put(0x3F1B, 0); - table.put(0x7524, 0); - table.put(0x76DB, 0); - table.put(0x22844, 0); - table.put(0x8606, 0); - table.put(0x9CFD, 0); - table.put(0x51AC, 0); - table.put(0x8964, 0); - table.put(0x87BA, 0); - table.put(0x5351, 0); - table.put(0x5DFD, 0); - table.put(0x6797, 0); - table.put(0x9E9F, 0); - table.put(0x51A4, 0); - table.put(0x6EDB, 0); - table.put(0x5C3F, 0); - table.put(0x707D, 0); - table.put(0x7E09, 0); - table.put(0x76CA, 0); - table.put(0x5F9A, 0); - table.put(0x7FBD, 0); - table.put(0x5349, 0); - table.put(0x9E97, 0); - table.put(0x65E3, 0); - table.put(0x7E02, 0); - table.put(0x65E2, 0); - table.put(0x6D1E, 0); - table.put(0x7077, 0); - table.put(0x3A6C, 0); - table.put(0x6D1B, 0); - table.put(0x6ED1, 0); - table.put(0x93F9, 0); - table.put(0x7070, 0); - table.put(0x786B, 0); - table.put(0x6785, 0); - table.put(0x6422, 0); - table.put(0x533F, 0); - table.put(0x6D16, 0); - table.put(0x54F6, 0); - table.put(0x6ECB, 0); - table.put(0x7506, 0); - table.put(0x9238, 0); - table.put(0x5DE2, 0); - table.put(0x5F8B, 0); - table.put(0x9234, 0); - table.put(0x5DE1, 0); - table.put(0x641C, 0); - table.put(0x677B, 0); - table.put(0x6EC7, 0); - table.put(0x243AB, 0); - table.put(0x2346D, 0); - table.put(0x8941, 0); - table.put(0x4CF8, 0); - table.put(0x5C22, 0); - table.put(0x958B, 0); - table.put(0x9E7F, 0); - table.put(0x6EBA, 0); - table.put(0x65C5, 0); - table.put(0x4CED, 0); - table.put(0x4B33, 0); - table.put(0x813E, 0); - table.put(0x232B8, 0); - table.put(0x9B12, 0); - table.put(0x784E, 0); - table.put(0x25626, 0); - table.put(0x285ED, 0); - table.put(0x704A, 0); - table.put(0x5F6B, 0); - table.put(0x2063A, 0); - table.put(0x2597C, 0); - table.put(0x4E86, 0); - table.put(0x675E, 0); - table.put(0x82E6, 0); - table.put(0x5317, 0); - table.put(0x82E5, 0); - table.put(0x4E82, 0); - table.put(0x5674, 0); - table.put(0x5F69, 0); - table.put(0x5C06, 0); - table.put(0x7AEE, 0); - table.put(0x5DBA, 0); - table.put(0x5F62, 0); - table.put(0x6756, 0); - table.put(0x7C92, 0); - table.put(0x6753, 0); - table.put(0x3D96, 0); - table.put(0x393A, 0); - table.put(0x8D77, 0); - table.put(0x4227, 0); - table.put(0x5DB2, 0); - table.put(0x79EB, 0); - table.put(0x4CCE, 0); - table.put(0x674E, 0); - table.put(0x349E, 0); - table.put(0x5668, 0); - table.put(0x5306, 0); - table.put(0x7039, 0); - table.put(0x3A2E, 0); - table.put(0x5305, 0); - table.put(0x54BD, 0); - table.put(0x43D9, 0); - table.put(0x8910, 0); - table.put(0x43D5, 0); - table.put(0x5F53, 0); - table.put(0x285D2, 0); - table.put(0x6B9F, 0); - table.put(0x738B, 0); - table.put(0x26B3C, 0); - table.put(0x7C7B, 0); - table.put(0x26247, 0); - table.put(0x0313, 0); - table.put(0x7387, 0); - table.put(0x5651, 0); - table.put(0x23D1E, 0); - table.put(0x456B, 0); - table.put(0x7ACB, 0); - table.put(0x23ED1, 0); - table.put(0x82BD, 0); - table.put(0x391C, 0); - table.put(0x8103, 0); - table.put(0x24FB8, 0); - table.put(0x701E, 0); - table.put(0x54A2, 0); - table.put(0x701B, 0); - table.put(0x737A, 0); - table.put(0x4202, 0); - table.put(0x71D0, 0); - table.put(0x79CA, 0); - table.put(0x4561, 0); - table.put(0x82B3, 0); - table.put(0x0301, 0); - table.put(0x82B1, 0); - table.put(0x0300, 0); - table.put(0x71CE, 0); - table.put(0x7375, 0); - table.put(0x455D, 0); - table.put(0x96F7, 0); - table.put(0x96F6, 0); - table.put(0x8457, 0); - table.put(0x22331, 0); - table.put(0x60E1, 0); - table.put(0x7C60, 0); - table.put(0x6721, 0); - table.put(0x57F4, 0); - table.put(0x26CD5, 0); - table.put(0x24FA1, 0); - table.put(0x6B79, 0); - table.put(0x6B77, 0); - table.put(0x7AB1, 0); - table.put(0x26228, 0); - table.put(0x671B, 0); - table.put(0x4E41, 0); - table.put(0x6B72, 0); - table.put(0x60D8, 0); - table.put(0x9094, 0); - table.put(0x5F22, 0); - table.put(0x6717, 0); - table.put(0x27667, 0); - table.put(0x8449, 0); - table.put(0x96E3, 0); - table.put(0x4E3D, 0); - table.put(0x96E2, 0); - table.put(0x5197, 0); - table.put(0x9E1E, 0); - table.put(0x5195, 0); - table.put(0x5192, 0); - table.put(0x85FA, 0); - table.put(0x79AE, 0); - table.put(0x908F, 0); - table.put(0x4E39, 0); - table.put(0x25133, 0); - table.put(0x4E38, 0); - table.put(0x4E32, 0); - table.put(0x843D, 0); - table.put(0x518D, 0); - table.put(0x60C7, 0); - table.put(0x291DF, 0); - table.put(0x6700, 0); - table.put(0x5180, 0); - table.put(0x24C92, 0); - table.put(0x7C3E, 0); - table.put(0x907C, 0); - table.put(0x4E26, 0); - table.put(0x7DF4, 0); - table.put(0x7F9A, 0); - table.put(0x6B54, 0); - table.put(0x21B18, 0); - table.put(0x57CE, 0); - table.put(0x2003, 0); - table.put(0x2002, 0); - table.put(0x4096, 0); - table.put(0x5973, 0); - table.put(0x8ED4, 0); - table.put(0x2872E, 0); - table.put(0x8D1B, 0); - table.put(0x5F04, 0); - table.put(0x452B, 0); - table.put(0x625D, 0); - table.put(0x7F95, 0); - table.put(0x9072, 0); - table.put(0x20A2C, 0); - table.put(0x96C3, 0); - table.put(0x5177, 0); - table.put(0x8786, 0); - table.put(0x25119, 0); - table.put(0x3D33, 0); - table.put(0x5A66, 0); - table.put(0x2A600, 0); - table.put(0x5A62, 0); - table.put(0x8ECA, 0); - table.put(0x68A8, 0); - table.put(0x27F2F, 0); - table.put(0x516D, 0); - table.put(0x5606, 0); - table.put(0x5962, 0); - table.put(0x96B8, 0); - table.put(0x985E, 0); - table.put(0x7F85, 0); - table.put(0x5169, 0); - table.put(0x8779, 0); - table.put(0x4E0D, 0); - table.put(0x5168, 0); - table.put(0x5167, 0); - table.put(0x85CD, 0); - table.put(0x5164, 0); - table.put(0x8D08, 0); - table.put(0x6E9C, 0); - table.put(0x20122, 0); - table.put(0x732A, 0); - table.put(0x4FBF, 0); - table.put(0x7F7A, 0); - table.put(0x25BAB, 0); - table.put(0x4FBB, 0); - table.put(0x24814, 0); - table.put(0x876B, 0); - table.put(0x5954, 0); - table.put(0x46C7, 0); - table.put(0x5951, 0); - table.put(0x7F79, 0); - table.put(0x96A3, 0); - table.put(0x7F72, 0); - table.put(0x6CE5, 0); - table.put(0x5154, 0); - table.put(0x3781, 0); - table.put(0x5BFF, 0); - table.put(0x46BE, 0); - table.put(0x6599, 0); - table.put(0x7DC7, 0); - table.put(0x99FE, 0); - table.put(0x6234, 0); - table.put(0x4FAE, 0); - table.put(0x5948, 0); - table.put(0x514D, 0); - table.put(0x5944, 0); - table.put(0x6B21, 0); - table.put(0x2219F, 0); - table.put(0x5BF3, 0); - table.put(0x74CA, 0); - table.put(0x7DBE, 0); - table.put(0x622E, 0); - table.put(0x63E4, 0); - table.put(0x983B, 0); - table.put(0x99F1, 0); - table.put(0x5145, 0); - table.put(0x52FA, 0); - table.put(0x5BEE, 0); - table.put(0x5140, 0); - table.put(0x3EB8, 0); - table.put(0x549E, 0); - table.put(0x52F5, 0); - table.put(0x6CCD, 0); - table.put(0x7669, 0); - table.put(0x6CCC, 0); - table.put(0x5BE7, 0); - table.put(0x26523, 0); - table.put(0x9038, 0); - table.put(0x27966, 0); - table.put(0x621B, 0); - table.put(0x6E6E, 0); - table.put(0x5A27, 0); - table.put(0x3EAC, 0); - table.put(0x88FA, 0); - table.put(0x9829, 0); - table.put(0x6578, 0); - table.put(0x22183, 0); - table.put(0x88F8, 0); - table.put(0x6CBF, 0); - table.put(0x7DA0, 0); - table.put(0x6210, 0); - table.put(0x6B04, 0); - table.put(0x52E4, 0); - table.put(0x5BD8, 0); - table.put(0x216EA, 0); - table.put(0x4046, 0); - table.put(0x5922, 0); - table.put(0x5D7C, 0); - table.put(0x5A1B, 0); - table.put(0x9023, 0); - table.put(0x63C5, 0); - table.put(0x63C4, 0); - table.put(0x656C, 0); - table.put(0x829D, 0); - table.put(0x52DE, 0); - table.put(0x24C36, 0); - table.put(0x591A, 0); - table.put(0x9818, 0); - table.put(0x8B8A, 0); - table.put(0x7F3E, 0); - table.put(0x26FB1, 0); - table.put(0x91D1, 0); - table.put(0x6200, 0); - table.put(0x4039, 0); - table.put(0x238A7, 0); - table.put(0x8291, 0); - table.put(0x52D2, 0); - table.put(0x88E1, 0); - table.put(0x5D6E, 0); - table.put(0x91CF, 0); - table.put(0x5BC3, 0); - table.put(0x2541D, 0); - table.put(0x49E6, 0); - table.put(0x8B80, 0); - table.put(0x7642, 0); - table.put(0x5D6B, 0); - table.put(0x91CC, 0); - table.put(0x980B, 0); - table.put(0x8728, 0); - table.put(0x99C2, 0); - table.put(0x88DE, 0); - table.put(0x828B, 0); - table.put(0x3B9D, 0); - table.put(0x6556, 0); - table.put(0x52C9, 0); - table.put(0x52C7, 0); - table.put(0x88D7, 0); - table.put(0x2A105, 0); - table.put(0x6FFE, 0); - table.put(0x5906, 0); - table.put(0x719C, 0); - table.put(0x63A9, 0); - table.put(0x654F, 0); - table.put(0x9C57, 0); - table.put(0x5468, 0); - table.put(0x88CF, 0); - table.put(0x63A0, 0); - table.put(0x798F, 0); - table.put(0x798E, 0); - table.put(0x798D, 0); - table.put(0x91B4, 0); - table.put(0x8279, 0); - table.put(0x5D50, 0); - table.put(0x7A81, 0); - table.put(0x870E, 0); - table.put(0x38E3, 0); - table.put(0x4018, 0); - table.put(0x52B3, 0); - table.put(0x6FEB, 0); - table.put(0x88C2, 0); - table.put(0x66F8, 0); - table.put(0x99A7, 0); - table.put(0x66F4, 0); - table.put(0x826F, 0); - table.put(0x6E2F, 0); - table.put(0x9DFA, 0); - table.put(0x22BF1, 0); - table.put(0x9C40, 0); - table.put(0x761F, 0); - table.put(0x797F, 0); - table.put(0x761D, 0); - table.put(0x5EFE, 0); - table.put(0x5D43, 0); - table.put(0x934A, 0); - table.put(0x4AB2, 0); - table.put(0x9F9C, 0); - table.put(0x8B58, 0); - table.put(0x52A3, 0); - table.put(0x4008, 0); - table.put(0x6094, 0); - table.put(0x7610, 0); - table.put(0x688E, 0); - table.put(0x216A8, 0); - table.put(0x5448, 0); - table.put(0x80B2, 0); - table.put(0x233D5, 0); - table.put(0x6E1A, 0); - table.put(0x5442, 0); - table.put(0x5EEC, 0); - table.put(0x02B9, 0); - table.put(0x6885, 0); - table.put(0x38C7, 0); - table.put(0x9F8E, 0); - table.put(0x80AD, 0); - table.put(0x9F8D, 0); - table.put(0x6881, 0); - table.put(0x4359, 0); - table.put(0x716E, 0); - table.put(0x7965, 0); - table.put(0x23CBC, 0); - table.put(0x5438, 0); - table.put(0x6081, 0); - table.put(0x6FC6, 0); - table.put(0x261DA, 0); - table.put(0x233C3, 0); - table.put(0x795E, 0); - table.put(0x795D, 0); - table.put(0x250F3, 0); - table.put(0x250F2, 0); - table.put(0x8B39, 0); - table.put(0x5ED9, 0); - table.put(0x578B, 0); - table.put(0x55E2, 0); - table.put(0x7956, 0); - table.put(0x7A4F, 0); - table.put(0x9686, 0); - table.put(0x5ED3, 0); - table.put(0x5ED2, 0); - table.put(0x66C6, 0); - table.put(0x7950, 0); - table.put(0x7A4A, 0); - table.put(0x5D19, 0); - table.put(0x83F1, 0); - table.put(0x77A7, 0); - table.put(0x5ECA, 0); - table.put(0x967C, 0); - table.put(0x83EF, 0); - table.put(0x3B49, 0); - table.put(0x7A40, 0); - table.put(0x4F8B, 0); - table.put(0x541D, 0); - table.put(0x7949, 0); - table.put(0x5EC9, 0); - table.put(0x7948, 0); - table.put(0x9678, 0); - table.put(0x4334, 0); - table.put(0x9675, 0); - table.put(0x4F86, 0); - table.put(0x66B4, 0); - table.put(0x83E7, 0); - table.put(0x7149, 0); - table.put(0x4F80, 0); - table.put(0x7145, 0); - table.put(0x8CD3, 0); - table.put(0x793E, 0); - table.put(0x72FC, 0); - table.put(0x793C, 0); - table.put(0x9DB4, 0); - table.put(0x540F, 0); - table.put(0x6852, 0); - table.put(0x83DC, 0); - table.put(0x55C2, 0); - table.put(0x5EB6, 0); - table.put(0x55C0, 0); - table.put(0x5EB3, 0); - table.put(0x9304, 0); - table.put(0x5EB0, 0); - table.put(0x219C8, 0); - table.put(0x5406, 0); - table.put(0x8CC8, 0); - table.put(0x7498, 0); - table.put(0x208DE, 0); - table.put(0x8CC2, 0); - table.put(0x8CC1, 0); - table.put(0x792A, 0); - table.put(0x83CC, 0); - table.put(0x83CA, 0); - table.put(0x55B3, 0); - table.put(0x5EA6, 0); - table.put(0x8B01, 0); - table.put(0x83C9, 0); - table.put(0x7A1C, 0); - table.put(0x9F43, 0); - table.put(0x7489, 0); - table.put(0x4F60, 0); - table.put(0x27ED3, 0); - table.put(0x55AB, 0); - table.put(0x7485, 0); - table.put(0x26C36, 0); - table.put(0x61F6, 0); - table.put(0x964D, 0); - table.put(0x964B, 0); - table.put(0x61F2, 0); - table.put(0x3B19, 0); - table.put(0x8204, 0); - table.put(0x83BD, 0); - table.put(0x69EA, 0); - table.put(0x2633E, 0); - table.put(0x8564, 0); - table.put(0x9F3B, 0); - table.put(0x8201, 0); - table.put(0x7BC9, 0); - table.put(0x8CAB, 0); - table.put(0x97FF, 0); - table.put(0x7BC6, 0); - table.put(0x24608, 0); - table.put(0x4301, 0); - table.put(0x20B63, 0); - table.put(0x6C88, 0); - table.put(0x7BC0, 0); - table.put(0x6ADB, 0); - table.put(0x7471, 0); - table.put(0x7D63, 0); - table.put(0x3B08, 0); - table.put(0x29145, 0); - table.put(0x83AD, 0); - table.put(0x58F7, 0); - table.put(0x6AD3, 0); - table.put(0x25249, 0); - table.put(0x58F2, 0); - table.put(0x72C0, 0); - table.put(0x61DE, 0); - table.put(0x681F, 0); - table.put(0x7D5B, 0); - table.put(0x6383, 0); - table.put(0x7469, 0); - table.put(0x58EE, 0); - table.put(0x529B, 0); - table.put(0x2A291, 0); - table.put(0x9199, 0); - table.put(0x264DA, 0); - table.put(0x962E, 0); - table.put(0x6817, 0); - table.put(0x601C, 0); - table.put(0x637B, 0); - table.put(0x29496, 0); - table.put(0x97E0, 0); - table.put(0x21DE6, 0); - table.put(0x21DE4, 0); - table.put(0x5B85, 0); - table.put(0x745C, 0); - table.put(0x97DB, 0); - table.put(0x6012, 0); - table.put(0x50E7, 0); - table.put(0x6C67, 0); - table.put(0x9F16, 0); - table.put(0x58DF, 0); - table.put(0x72AF, 0); - table.put(0x5289, 0); - table.put(0x9F0F, 0); - table.put(0x58D8, 0); - table.put(0x214E4, 0); - table.put(0x25CD0, 0); - table.put(0x50DA, 0); - table.put(0x3FFC, 0); - table.put(0x6368, 0); - table.put(0x4635, 0); - table.put(0x4995, 0); - table.put(0x2A0CE, 0); - table.put(0x9F05, 0); - table.put(0x5717, 0); - table.put(0x9A6A, 0); - table.put(0x5716, 0); - table.put(0x7447, 0); - table.put(0x86E2, 0); - table.put(0x236A3, 0); - table.put(0x23AFA, 0); - table.put(0x6AA8, 0); - table.put(0x61B2, 0); - table.put(0x5277, 0); - table.put(0x50CF, 0); - table.put(0x36FC, 0); - table.put(0x6C4E, 0); - table.put(0x808B, 0); - table.put(0x5272, 0); - table.put(0x7D2F, 0); - table.put(0x61AF, 0); - table.put(0x916A, 0); - table.put(0x69A3, 0); - table.put(0x6350, 0); - table.put(0x4A76, 0); - table.put(0x61A4, 0); - table.put(0x6DF9, 0); - table.put(0x807E, 0); - table.put(0x7D22, 0); - table.put(0x36EE, 0); - table.put(0x8516, 0); - table.put(0x58B3, 0); - table.put(0x2339C, 0); - table.put(0x75E2, 0); - table.put(0x778B, 0); - table.put(0x4A6E, 0); - table.put(0x8070, 0); - table.put(0x8FB6, 0); - table.put(0x6DEA, 0); - table.put(0x58AC, 0); - table.put(0x23393, 0); - table.put(0x7422, 0); - table.put(0x8FB0, 0); - table.put(0x633D, 0); - table.put(0x806F, 0); - table.put(0x669C, 0); - table.put(0x7D10, 0); - table.put(0x58A8, 0); - table.put(0x8863, 0); - table.put(0x8860, 0); - table.put(0x26F2C, 0); - table.put(0x00C5, 0); - table.put(0x6691, 0); - table.put(0x387C, 0); - table.put(0x8060, 0); - table.put(0x6DDA, 0); - table.put(0x5B3E, 0); - table.put(0x3515, 0); - table.put(0x270D2, 0); - table.put(0x5246, 0); - table.put(0x7D00, 0); - return table; - } - } -} diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java index 1f7806efa..f733606a2 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SingleSet.java @@ -22,6 +22,7 @@ package org.teavm.classlib.java.util.regex; /** * Group node over subexpression w/o alternations. + * * @author Nikolay A. Kuznetsov */ class SingleSet extends JointSet { @@ -34,8 +35,8 @@ class SingleSet extends JointSet { this.groupIndex = fSet.getGroupIndex(); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int start = matchResult.getStart(groupIndex); matchResult.setStart(groupIndex, stringIndex); int shift = kid.matches(stringIndex, testString, matchResult); @@ -46,81 +47,82 @@ class SingleSet extends JointSet { return -1; } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int res = kid.find(stringIndex, testString, matchResult); if (res >= 0) matchResult.setStart(groupIndex, res); return res; } - public int findBack(int stringIndex, int lastIndex, - CharSequence testString, MatchResultImpl matchResult) { + @Override + public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { int res = kid.findBack(stringIndex, lastIndex, testString, matchResult); if (res >= 0) matchResult.setStart(groupIndex, res); return res; } + @Override public boolean first(AbstractSet set) { return kid.first(set); } /** - * This method is used for replacement backreferenced - * sets. + * This method is used for replacement backreferenced sets. */ + @Override public JointSet processBackRefReplacement() { BackReferencedSingleSet set = new BackReferencedSingleSet(this); /* - * We will store a reference to created BackReferencedSingleSet - * in next field. This is needed toprocess replacement - * of sets correctly since sometimes we cannot renew all references to - * detachable set in the current point of traverse. See - * QuantifierSet and AbstractSet processSecondPass() methods for - * more details. + * We will store a reference to created BackReferencedSingleSet in next + * field. This is needed toprocess replacement of sets correctly since + * sometimes we cannot renew all references to detachable set in the + * current point of traverse. See QuantifierSet and AbstractSet + * processSecondPass() methods for more details. */ next = set; return set; } /** - * This method is used for traversing nodes after the - * first stage of compilation. + * This method is used for traversing nodes after the first stage of + * compilation. */ + @Override public void processSecondPass() { - this.isSecondPassVisited = true; + this.isSecondPassVisited = true; if (fSet != null && !fSet.isSecondPassVisited) { - /* - * Add here code to do during the pass - */ + /* + * Add here code to do during the pass + */ - /* - * End code to do during the pass - */ - fSet.processSecondPass(); - } + /* + * End code to do during the pass + */ + fSet.processSecondPass(); + } if (kid != null && !kid.isSecondPassVisited) { - /* - * Add here code to do during the pass - */ - JointSet set = kid.processBackRefReplacement(); + /* + * Add here code to do during the pass + */ + JointSet set = kid.processBackRefReplacement(); - if (set != null) { - kid.isSecondPassVisited = true; - kid = (AbstractSet) set; - } + if (set != null) { + kid.isSecondPassVisited = true; + kid = set; + } - /* - * End code to do during the pass - */ + /* + * End code to do during the pass + */ - kid.processSecondPass(); + kid.processSecondPass(); } } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java index a3daddf1c..342316ba6 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplCharSet.java @@ -94,33 +94,34 @@ class SupplCharSet extends LeafSet { private char low = 0; - //int value of this supplementary codepoint + // int value of this supplementary codepoint private int ch; public SupplCharSet(int ch) { charCount = 2; this.ch = ch; - char [] chUTF16 = Character.toChars(ch); + char[] chUTF16 = Character.toChars(ch); high = chUTF16[0]; /* - * we suppose that SupplCharSet is - * build over supplementary codepoints only + * we suppose that SupplCharSet is build over supplementary codepoints + * only */ low = chUTF16[1]; } + @Override public int accepts(int strIndex, CharSequence testString) { char high = testString.charAt(strIndex++); char low = testString.charAt(strIndex); return ((this.high == high) && (this.low == low)) ? 2 : -1; } - public int find(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; int strLength = matchResult.getRightBound(); while (strIndex < strLength) { @@ -132,9 +133,7 @@ class SupplCharSet extends LeafSet { if (strIndex < strLength) { char ch = testStr.charAt(strIndex); - if ((low == ch) - && (next.matches(strIndex + 1, - testString, matchResult) >= 0)) { + if ((low == ch) && (next.matches(strIndex + 1, testString, matchResult) >= 0)) { return --strIndex; } strIndex++; @@ -146,11 +145,11 @@ class SupplCharSet extends LeafSet { return super.find(strIndex, testString, matchResult); } - public int findBack(int strIndex, int lastIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) { if (testString instanceof String) { - String testStr = (String) testString; + String testStr = (String)testString; while (lastIndex >= strIndex) { lastIndex = testStr.lastIndexOf(low, lastIndex); @@ -159,9 +158,7 @@ class SupplCharSet extends LeafSet { return -1; } - if ((high == testStr.charAt(lastIndex)) - && next.matches(lastIndex + 2, - testString, matchResult) >= 0) { + if ((high == testStr.charAt(lastIndex)) && next.matches(lastIndex + 2, testString, matchResult) >= 0) { return lastIndex; } @@ -173,6 +170,7 @@ class SupplCharSet extends LeafSet { return super.findBack(strIndex, lastIndex, testString, matchResult); } + @Override protected String getName() { return "" + high + low; } @@ -181,12 +179,12 @@ class SupplCharSet extends LeafSet { return ch; } + @Override public boolean first(AbstractSet set) { if (set instanceof SupplCharSet) { - return ((SupplCharSet) set).getCodePoint() == ch; + return ((SupplCharSet)set).getCodePoint() == ch; } else if (set instanceof SupplRangeSet) { - return ((SupplRangeSet) set) - .contains(ch); + return ((SupplRangeSet)set).contains(ch); } else if (set instanceof CharSet) { return false; } else if (set instanceof RangeSet) { diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java index 3a2b73f57..169ac68ed 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/SupplRangeSet.java @@ -83,9 +83,9 @@ package org.teavm.classlib.java.util.regex; /** - * Represents node accepting single character from the given char class. - * This character can be supplementary (2 chars needed to represent) or from - * basic multilingual pane (1 needed char to represent it). + * Represents node accepting single character from the given char class. This + * character can be supplementary (2 chars needed to represent) or from basic + * multilingual pane (1 needed char to represent it). */ class SupplRangeSet extends JointSet { @@ -104,24 +104,22 @@ class SupplRangeSet extends JointSet { this.alt = cc.alt; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int strLength = matchResult.getRightBound(); int offset = -1; if (stringIndex < strLength) { char high = testString.charAt(stringIndex++); - if (contains(high) && - (offset = next.matches(stringIndex, testString, matchResult)) > 0) { + if (contains(high) && (offset = next.matches(stringIndex, testString, matchResult)) > 0) { return offset; } if (stringIndex < strLength) { char low = testString.charAt(stringIndex++); - if (Character.isSurrogatePair(high, low) - && contains(Character.toCodePoint(high, low))) { + if (Character.isSurrogatePair(high, low) && contains(Character.toCodePoint(high, low))) { return next.matches(stringIndex, testString, matchResult); } } @@ -130,6 +128,7 @@ class SupplRangeSet extends JointSet { return -1; } + @Override protected String getName() { return "range:" + (alt ? "^ " : " ") + chars.toString(); } @@ -138,19 +137,16 @@ class SupplRangeSet extends JointSet { return chars.contains(ch); } + @Override public boolean first(AbstractSet set) { if (set instanceof SupplCharSet) { - return AbstractCharClass.intersects(chars, ((SupplCharSet) set) - .getCodePoint()); + return AbstractCharClass.intersects(chars, ((SupplCharSet)set).getCodePoint()); } else if (set instanceof CharSet) { - return AbstractCharClass.intersects(chars, ((CharSet) set) - .getChar()); + return AbstractCharClass.intersects(chars, ((CharSet)set).getChar()); } else if (set instanceof SupplRangeSet) { - return AbstractCharClass.intersects(chars, ((SupplRangeSet) set) - .chars); + return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).chars); } else if (set instanceof RangeSet) { - return AbstractCharClass.intersects(chars, ((RangeSet) set) - .getChars()); + return AbstractCharClass.intersects(chars, ((RangeSet)set).getChars()); } return true; @@ -160,14 +156,17 @@ class SupplRangeSet extends JointSet { return chars; } + @Override public AbstractSet getNext() { return next; } + @Override public void setNext(AbstractSet next) { this.next = next; } + @Override public boolean hasConsumed(MatchResultImpl mr) { return true; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java index f6ba09a95..ed68b6f8f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TMatcher.java @@ -19,23 +19,22 @@ package org.teavm.classlib.java.util.regex; import java.util.ArrayList; - /** * Provides a means of matching regular expressions against a given input, * finding occurrences of regular expressions in a given input, or replacing - * parts of a given input. A {@code Matcher} instance has an associated {@link - * TPattern} instance and an input text. A typical use case is to - * iteratively find all occurrences of the {@code Pattern}, until the end of - * the input is reached, as the following example illustrates: + * parts of a given input. A {@code Matcher} instance has an associated + * {@link TPattern} instance and an input text. A typical use case is to + * iteratively find all occurrences of the {@code Pattern}, until the end of the + * input is reached, as the following example illustrates: * *

* *

- * Pattern p = Pattern.compile("[A-Za-z]+");
+ * Pattern p = Pattern.compile("[A-Za-z]+");
  *
- * Matcher m = p.matcher("Hello, Android!");
+ * Matcher m = p.matcher("Hello, Android!");
  * while (m.find()) {
- *     System.out.println(m.group()); // prints "Hello" and "Android"
+ *     System.out.println(m.group()); // prints "Hello" and "Android"
  * }
  * 
* @@ -73,7 +72,7 @@ public final class TMatcher implements TMatchResult { private String processedRepl = null; - private ArrayList replacementParts = null; + private ArrayList replacementParts = null; /** * Appends a literal part of the input plus a replacement for the current @@ -103,8 +102,7 @@ public final class TMatcher implements TMatchResult { * Parses replacement string and creates pattern */ private String processReplacement(String replacement) { - if (this.replacement != null - && this.replacement.equals(replacement)) { + if (this.replacement != null && this.replacement.equals(replacement)) { if (replacementParts == null) { return processedRepl; } else { @@ -138,21 +136,20 @@ public final class TMatcher implements TMatchResult { } else { if (repl[index] == '$') { if (replacementParts == null) { - replacementParts = new ArrayList(); + replacementParts = new ArrayList<>(); } try { - final int gr = Integer.parseInt(new String( - repl, ++index, 1)); + final int gr = Integer.parseInt(new String(repl, ++index, 1)); if (replacementPos != res.length()) { - replacementParts.add(res.subSequence( - replacementPos, res.length())); + replacementParts.add(res.subSequence(replacementPos, res.length())); replacementPos = res.length(); } - replacementParts.add(new Object() { //$NON-LOCK-1$ + replacementParts.add(new Object() { private final int grN = gr; + @Override public String toString() { return group(grN); } @@ -164,7 +161,7 @@ public final class TMatcher implements TMatchResult { } catch (IndexOutOfBoundsException iob) { throw iob; } catch (Exception e) { - throw new IllegalArgumentException(""); //$NON-NLS-1$ + throw new IllegalArgumentException(""); } } else { res.append(repl[index]); @@ -175,8 +172,7 @@ public final class TMatcher implements TMatchResult { } if (replacementParts != null && replacementPos != res.length()) { - replacementParts.add(res.subSequence(replacementPos, res - .length())); + replacementParts.add(res.subSequence(replacementPos, res.length())); } return res.toString(); } @@ -204,8 +200,8 @@ public final class TMatcher implements TMatchResult { /** * Resets the {@code Matcher}. This results in the region being set to the * whole input. Results of a previous find get lost. The next attempt to - * find an occurrence of the {@link TPattern} in the string will start at the - * beginning of the input. + * find an occurrence of the {@link TPattern} in the string will start at + * the beginning of the input. * * @return the {@code Matcher} itself. */ @@ -231,9 +227,8 @@ public final class TMatcher implements TMatchResult { */ public TMatcher region(int start, int end) { - if (start > end || start < 0 || end < 0 - || start > string.length() || end > string.length()) { - throw new IndexOutOfBoundsException(start + ", " + end); + if (start > end || start < 0 || end < 0 || start > string.length() || end > string.length()) { + throw new IndexOutOfBoundsException(start + ", " + end); } this.leftBound = start; @@ -319,6 +314,7 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public String group(int group) { return matchResult.group(group); } @@ -330,6 +326,7 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public String group() { return group(0); } @@ -383,15 +380,13 @@ public final class TMatcher implements TMatchResult { int length = string.length(); if (!hasTransparentBounds()) length = rightBound; - if (matchResult.startIndex >= 0 - && matchResult.mode() == TMatcher.MODE_FIND) { + if (matchResult.startIndex >= 0 && matchResult.mode() == TMatcher.MODE_FIND) { matchResult.startIndex = matchResult.end(); if (matchResult.end() == matchResult.start()) { matchResult.startIndex++; } - return matchResult.startIndex <= length ? find(matchResult.startIndex) - : false; + return matchResult.startIndex <= length ? find(matchResult.startIndex) : false; } else { return find(leftBound); } @@ -408,6 +403,7 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public int start(int group) { return matchResult.start(group); } @@ -423,6 +419,7 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public int end(int group) { return matchResult.end(group); } @@ -458,16 +455,16 @@ public final class TMatcher implements TMatchResult { for (int i = 0; i < len; i++) { switch (ch = s.charAt(i)) { - case '$': - res.append('\\'); - res.append('$'); - break; - case '\\': - res.append('\\'); - res.append('\\'); - break; - default: - res.append(ch); + case '$': + res.append('\\'); + res.append('$'); + break; + case '\\': + res.append('\\'); + res.append('\\'); + break; + default: + res.append(ch); } } @@ -479,8 +476,7 @@ public final class TMatcher implements TMatchResult { * sequence starting at index specified; Result of the match * will be stored into matchResult instance; */ - private boolean runMatch(AbstractSet set, int index, - MatchResultImpl matchResult) { + private boolean runMatch(AbstractSet set, int index, MatchResultImpl matchResult) { if (set.matches(index, string, matchResult) >= 0) { matchResult.finalizeMatch(); @@ -516,16 +512,18 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public int start() { return start(0); } /** - * Returns the number of groups in the results, which is always equal to - * the number of groups in the original regular expression. + * Returns the number of groups in the results, which is always equal to the + * number of groups in the original regular expression. * * @return the number of groups. */ + @Override public int groupCount() { return matchResult.groupCount(); } @@ -538,6 +536,7 @@ public final class TMatcher implements TMatchResult { * @throws IllegalStateException * if no successful match has been made. */ + @Override public int end() { return end(0); } @@ -660,15 +659,15 @@ public final class TMatcher implements TMatchResult { * @return the {@code Matcher} itself. */ public TMatcher usePattern(TPattern pattern) { - if (pattern == null) { - throw new IllegalArgumentException(""); - } + if (pattern == null) { + throw new IllegalArgumentException(""); + } int startIndex = matchResult.getPreviousMatchEnd(); int mode = matchResult.mode(); this.pat = pattern; this.start = pattern.start; - matchResult = new MatchResultImpl(this.string, leftBound, rightBound, - pattern.groupCount(), pattern.compCount(), pattern.consCount()); + matchResult = new MatchResultImpl(this.string, leftBound, rightBound, pattern.groupCount(), + pattern.compCount(), pattern.consCount()); matchResult.setStartIndex(startIndex); matchResult.setMode(mode); return this; @@ -680,8 +679,7 @@ public final class TMatcher implements TMatchResult { this.string = cs; this.leftBound = 0; this.rightBound = string.length(); - matchResult = new MatchResultImpl(cs, leftBound, rightBound, pat - .groupCount(), pat.compCount(), pat.consCount()); + matchResult = new MatchResultImpl(cs, leftBound, rightBound, pat.groupCount(), pat.compCount(), pat.consCount()); } @Override @@ -691,8 +689,7 @@ public final class TMatcher implements TMatchResult { lastMatch = Integer.toString(start()); } catch (IllegalStateException e) { } - return getClass().getCanonicalName() + "[pattern=" + pat + " region=" - + matchResult.getLeftBound() + "," - + matchResult.getRightBound() + " lastmatch=" + lastMatch + "]"; + return "Regex[pattern=" + pat + " region=" + matchResult.getLeftBound() + "," + matchResult.getRightBound() + + " lastmatch=" + lastMatch + "]"; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java index 2bb0bf0ae..307c6cf41 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPattern.java @@ -21,10 +21,8 @@ package org.teavm.classlib.java.util.regex; import java.io.Serializable; - import java.util.ArrayList; - /** * Represents a pattern used for matching, searching, or replacing strings. * {@code Pattern}s are specified in terms of regular expressions and compiled @@ -33,23 +31,24 @@ import java.util.ArrayList; *

* A typical use case looks like this: *

- *

- * Pattern p = Pattern.compile("Hello, A[a-z]*!");
  *
- * Matcher m = p.matcher("Hello, Android!");
+ * 
+ * Pattern p = Pattern.compile("Hello, A[a-z]*!");
+ *
+ * Matcher m = p.matcher("Hello, Android!");
  * boolean b1 = m.matches(); // true
  *
- * m.setInput("Hello, Robot!");
+ * m.setInput("Hello, Robot!");
  * boolean b2 = m.matches(); // false
  * 
*

* The above code could also be written in a more compact fashion, though this * variant is less efficient, since {@code Pattern} and {@code Matcher} objects - * are created on the fly instead of being reused. - * fashion: + * are created on the fly instead of being reused. fashion: + * *

- *     boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
- *     boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!");   // false
+ * boolean b1 = Pattern.matches("Hello, A[a-z]*!", "Hello, Android!"); // true
+ * boolean b2 = Pattern.matches("Hello, A[a-z]*!", "Hello, Robot!"); // false
  * 
* * @see TMatcher @@ -119,14 +118,8 @@ public final class TPattern implements Serializable { /** * Bit mask that includes all defined match flags */ - static final int flagsBitMask = TPattern.UNIX_LINES | - TPattern.CASE_INSENSITIVE | - TPattern.COMMENTS | - TPattern.MULTILINE | - TPattern.LITERAL | - TPattern.DOTALL | - TPattern.UNICODE_CASE | - TPattern.CANON_EQ; + static final int flagsBitMask = TPattern.UNIX_LINES | TPattern.CASE_INSENSITIVE | TPattern.COMMENTS | + TPattern.MULTILINE | TPattern.LITERAL | TPattern.DOTALL | TPattern.UNICODE_CASE | TPattern.CANON_EQ; /** * Current pattern to be compiled; @@ -143,7 +136,7 @@ public final class TPattern implements Serializable { /* * All backreferences that may be used in pattern. */ - transient private FSet backRefs [] = new FSet [BACK_REF_NUMBER]; + transient private FSet backRefs[] = new FSet[BACK_REF_NUMBER]; /* * Is true if backreferenced sets replacement is needed @@ -174,8 +167,8 @@ public final class TPattern implements Serializable { } /** - * Splits the given input sequence around occurrences of the {@code Pattern}. - * The function first determines all occurrences of the {@code Pattern} + * Splits the given input sequence around occurrences of the {@code Pattern} + * . The function first determines all occurrences of the {@code Pattern} * inside the input sequence. It then builds an array of the * "remaining" strings before, in-between, and after these * occurrences. An additional parameter determines the maximal number of @@ -190,9 +183,9 @@ public final class TPattern implements Serializable { *
    *
  • For n > 0, it is guaranteed that the resulting array * contains at most n entries. - *
  • For n < 0, the length of the resulting array is - * exactly the number of occurrences of the {@code Pattern} +1. - * All entries are included. + *
  • For n < 0, the length of the resulting array is exactly + * the number of occurrences of the {@code Pattern} +1. All + * entries are included. *
  • For n == 0, the length of the resulting array is at most * the number of occurrences of the {@code Pattern} +1. Empty * strings at the end of the array are not included. @@ -201,18 +194,18 @@ public final class TPattern implements Serializable { * @return the resulting array. */ public String[] split(CharSequence inputSeq, int limit) { - ArrayList res = new ArrayList(); + ArrayList res = new ArrayList<>(); TMatcher mat = matcher(inputSeq); int index = 0; int curPos = 0; if (inputSeq.length() == 0) { - return new String [] {""}; //$NON-NLS-1$ + return new String[] { "" }; //$NON-NLS-1$ } else { while (mat.find() && (index + 1 < limit || limit <= 0)) { - res.add(inputSeq.subSequence(curPos, mat.start()).toString()); - curPos = mat.end(); - index++; + res.add(inputSeq.subSequence(curPos, mat.start()).toString()); + curPos = mat.end(); + index++; } res.add(inputSeq.subSequence(curPos, inputSeq.length()).toString()); @@ -223,11 +216,11 @@ public final class TPattern implements Serializable { */ if (limit == 0) { while (--index >= 0 && res.get(index).toString().length() == 0) { - res.remove(index); + res.remove(index); } } } - return (String[]) res.toArray(new String[index >= 0 ? index : 0]); + return res.toArray(new String[index >= 0 ? index : 0]); } /** @@ -303,14 +296,12 @@ public final class TPattern implements Serializable { * @see #UNICODE_CASE * @see #UNIX_LINES */ - public static TPattern compile(String pattern, int flags) - throws TPatternSyntaxException { + public static TPattern compile(String pattern, int flags) throws TPatternSyntaxException { - if ((flags != 0) && - ((flags | flagsBitMask) != flagsBitMask)) { + if ((flags != 0) && ((flags | flagsBitMask) != flagsBitMask)) { - throw new IllegalArgumentException(""); - } + throw new IllegalArgumentException(""); + } AbstractSet.counter = 1; @@ -319,16 +310,15 @@ public final class TPattern implements Serializable { /** * - * @param pattern - - * Regular expression to be compiled - * @param flags - - * The bit mask including CASE_INSENSITIVE, MULTILINE, DOTALL, + * @param pattern + * - Regular expression to be compiled + * @param flags + * - The bit mask including CASE_INSENSITIVE, MULTILINE, DOTALL, * UNICODE_CASE, and CANON_EQ * * @return Compiled pattern */ - private TPattern compileImpl(String pattern, int flags) - throws TPatternSyntaxException { + private TPattern compileImpl(String pattern, int flags) throws TPatternSyntaxException { this.lexemes = new Lexer(pattern, flags); this.flags = flags; this.pattern = pattern; @@ -345,13 +335,10 @@ public final class TPattern implements Serializable { * A->(a|)+ */ private AbstractSet processAlternations(AbstractSet last) { - CharClass auxRange = new CharClass(hasFlag(TPattern.CASE_INSENSITIVE), - hasFlag(TPattern.UNICODE_CASE)); - while (!lexemes.isEmpty() - && lexemes.isLetter() - && (lexemes.lookAhead() == 0 - || lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes - .lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS)) { + CharClass auxRange = new CharClass(hasFlag(TPattern.CASE_INSENSITIVE), hasFlag(TPattern.UNICODE_CASE)); + while (!lexemes.isEmpty() && + lexemes.isLetter() && + (lexemes.lookAhead() == 0 || lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes.lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS)) { auxRange.add(lexemes.next()); if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR) lexemes.next(); @@ -365,9 +352,8 @@ public final class TPattern implements Serializable { /** * E->AE; E->S|E; E->S; A->(a|)+ E->S(|S)* */ - private AbstractSet processExpression(int ch, int newFlags, - AbstractSet last) { - ArrayList children = new ArrayList(); + private AbstractSet processExpression(int ch, int newFlags, AbstractSet last) { + ArrayList children = new ArrayList<>(); AbstractSet child; int saveFlags = flags; FSet fSet; @@ -378,51 +364,50 @@ public final class TPattern implements Serializable { } switch (ch) { - case Lexer.CHAR_NONCAP_GROUP: - fSet = new NonCapFSet(++consCount); - break; + case Lexer.CHAR_NONCAP_GROUP: + fSet = new NonCapFSet(++consCount); + break; - case Lexer.CHAR_POS_LOOKAHEAD: - /* falls through */ + case Lexer.CHAR_POS_LOOKAHEAD: + /* falls through */ - case Lexer.CHAR_NEG_LOOKAHEAD: - fSet = new AheadFSet(); - break; + case Lexer.CHAR_NEG_LOOKAHEAD: + fSet = new AheadFSet(); + break; - case Lexer.CHAR_POS_LOOKBEHIND: - /* falls through */ + case Lexer.CHAR_POS_LOOKBEHIND: + /* falls through */ - case Lexer.CHAR_NEG_LOOKBEHIND: - fSet = new BehindFSet(++consCount); - break; + case Lexer.CHAR_NEG_LOOKBEHIND: + fSet = new BehindFSet(++consCount); + break; - case Lexer.CHAR_ATOMIC_GROUP: - fSet = new AtomicFSet(++consCount); - break; + case Lexer.CHAR_ATOMIC_GROUP: + fSet = new AtomicFSet(++consCount); + break; - default: - globalGroupIndex++; - if (last == null) { + default: + globalGroupIndex++; + if (last == null) { - // expr = new StartSet(); - fSet = new FinalSet(); - saveChangedFlags = true; - } else { + // expr = new StartSet(); + fSet = new FinalSet(); + saveChangedFlags = true; + } else { - // expr = new JointSet(globalGroupIndex); - fSet = new FSet(globalGroupIndex); - } - if (globalGroupIndex > -1 && globalGroupIndex < 10) { - backRefs[globalGroupIndex] = fSet; - } - break; + // expr = new JointSet(globalGroupIndex); + fSet = new FSet(globalGroupIndex); + } + if (globalGroupIndex > -1 && globalGroupIndex < 10) { + backRefs[globalGroupIndex] = fSet; + } + break; } do { - if (lexemes.isLetter() - && lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR) { + if (lexemes.isLetter() && lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR) { child = processAlternations(fSet); - } else if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR){ + } else if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR) { child = new EmptySet(fSet); lexemes.next(); } else { @@ -433,14 +418,13 @@ public final class TPattern implements Serializable { } if (child != null) { - //expr.addChild(child); - children.add(child); + // expr.addChild(child); + children.add(child); } - } while (!(lexemes.isEmpty() - || (lexemes.peek() == Lexer.CHAR_RIGHT_PARENTHESIS))); + } while (!(lexemes.isEmpty() || (lexemes.peek() == Lexer.CHAR_RIGHT_PARENTHESIS))); if (lexemes.back() == Lexer.CHAR_VERTICAL_BAR) { - children.add(new EmptySet(fSet)); + children.add(new EmptySet(fSet)); } if (flags != saveFlags && !saveChangedFlags) { @@ -449,64 +433,62 @@ public final class TPattern implements Serializable { } switch (ch) { - case Lexer.CHAR_NONCAP_GROUP: - return new NonCapJointSet(children, fSet); + case Lexer.CHAR_NONCAP_GROUP: + return new NonCapJointSet(children, fSet); - case Lexer.CHAR_POS_LOOKAHEAD: - return new PositiveLookAhead(children, fSet); + case Lexer.CHAR_POS_LOOKAHEAD: + return new PositiveLookAhead(children, fSet); - case Lexer.CHAR_NEG_LOOKAHEAD: - return new NegativeLookAhead(children, fSet); + case Lexer.CHAR_NEG_LOOKAHEAD: + return new NegativeLookAhead(children, fSet); - case Lexer.CHAR_POS_LOOKBEHIND: - return new PositiveLookBehind(children, fSet); + case Lexer.CHAR_POS_LOOKBEHIND: + return new PositiveLookBehind(children, fSet); - case Lexer.CHAR_NEG_LOOKBEHIND: - return new NegativeLookBehind(children, fSet); + case Lexer.CHAR_NEG_LOOKBEHIND: + return new NegativeLookBehind(children, fSet); - case Lexer.CHAR_ATOMIC_GROUP: - return new AtomicJointSet(children, fSet); - - default: - switch (children.size()) { - case 0: - return new EmptySet(fSet); - - case 1: - return new SingleSet((AbstractSet) children.get(0), fSet); + case Lexer.CHAR_ATOMIC_GROUP: + return new AtomicJointSet(children, fSet); default: - return new JointSet(children, fSet); - } + switch (children.size()) { + case 0: + return new EmptySet(fSet); + + case 1: + return new SingleSet(children.get(0), fSet); + + default: + return new JointSet(children, fSet); + } } } - /** * T->a+ */ - private AbstractSet processSequence(AbstractSet last) { + private AbstractSet processSequence() { StringBuffer substring = new StringBuffer(); - while (!lexemes.isEmpty() - && lexemes.isLetter() - && !lexemes.isHighSurrogate() - && !lexemes.isLowSurrogate() - && ((!lexemes.isNextSpecial() && lexemes.lookAhead() == 0) // end + while (!lexemes.isEmpty() && + lexemes.isLetter() && + !lexemes.isHighSurrogate() && + !lexemes.isLowSurrogate() && + ((!lexemes.isNextSpecial() && lexemes.lookAhead() == 0) // end // of // pattern - || (!lexemes.isNextSpecial() && Lexer.isLetter(lexemes - .lookAhead())) - || lexemes.lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS - || (lexemes.lookAhead() & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS - || lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes - .lookAhead() == Lexer.CHAR_DOLLAR)) { + || + (!lexemes.isNextSpecial() && Lexer.isLetter(lexemes.lookAhead())) || + lexemes.lookAhead() == Lexer.CHAR_RIGHT_PARENTHESIS || + (lexemes.lookAhead() & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS || + lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes.lookAhead() == Lexer.CHAR_DOLLAR)) { int ch = lexemes.next(); if (Character.isSupplementaryCodePoint(ch)) { substring.append(Character.toChars(ch)); } else { - substring.append((char) ch); + substring.append((char)ch); } } if (!hasFlag(TPattern.CASE_INSENSITIVE)) { @@ -521,16 +503,16 @@ public final class TPattern implements Serializable { /** * D->a */ - private AbstractSet processDecomposedChar(AbstractSet last) { - int [] codePoints = new int [Lexer.MAX_DECOMPOSITION_LENGTH]; - char [] codePointsHangul; + private AbstractSet processDecomposedChar() { + int[] codePoints = new int[Lexer.MAX_DECOMPOSITION_LENGTH]; + char[] codePointsHangul; int readCodePoints = 0; int curSymb = -1; int curSymbIndex = -1; if (!lexemes.isEmpty() && lexemes.isLetter()) { curSymb = lexemes.next(); - codePoints [readCodePoints] = curSymb; + codePoints[readCodePoints] = curSymb; curSymbIndex = curSymb - Lexer.LBase; } @@ -540,59 +522,55 @@ public final class TPattern implements Serializable { * "3.12 Conjoining Jamo Behavior" */ if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.LCount)) { - codePointsHangul = new char [Lexer - .MAX_HANGUL_DECOMPOSITION_LENGTH]; - codePointsHangul[readCodePoints++] = (char) curSymb; + codePointsHangul = new char[Lexer.MAX_HANGUL_DECOMPOSITION_LENGTH]; + codePointsHangul[readCodePoints++] = (char)curSymb; curSymb = lexemes.peek(); curSymbIndex = curSymb - Lexer.VBase; if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.VCount)) { - codePointsHangul [readCodePoints++] = (char) curSymb; + codePointsHangul[readCodePoints++] = (char)curSymb; lexemes.next(); curSymb = lexemes.peek(); curSymbIndex = curSymb - Lexer.TBase; if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.TCount)) { - codePointsHangul [readCodePoints++] = (char) curSymb; + codePointsHangul[readCodePoints++] = (char)curSymb; lexemes.next(); - //LVT syllable + // LVT syllable return new HangulDecomposedCharSet(codePointsHangul, 3); } else { - //LV syllable + // LV syllable return new HangulDecomposedCharSet(codePointsHangul, 2); } } else { - //L jamo - if (!hasFlag(TPattern.CASE_INSENSITIVE)) { - return new CharSet(codePointsHangul[0]); - } else if (!hasFlag(TPattern.UNICODE_CASE)) { - return new CICharSet(codePointsHangul[0]); - } else { - return new UCICharSet(codePointsHangul[0]); - } + // L jamo + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + return new CharSet(codePointsHangul[0]); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + return new CICharSet(codePointsHangul[0]); + } else { + return new UCICharSet(codePointsHangul[0]); + } } - /* - * We process single codepoint or decomposed codepoint. - * We collect decomposed codepoint and obtain - * one DecomposedCharSet. - */ + /* + * We process single codepoint or decomposed codepoint. We collect + * decomposed codepoint and obtain one DecomposedCharSet. + */ } else { readCodePoints++; - while((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) - && !lexemes.isEmpty() && lexemes.isLetter() - && !Lexer.isDecomposedCharBoundary(lexemes.peek())) { - codePoints [readCodePoints++] = lexemes.next(); + while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) && !lexemes.isEmpty() && lexemes.isLetter() && + !Lexer.isDecomposedCharBoundary(lexemes.peek())) { + codePoints[readCodePoints++] = lexemes.next(); } /* * We have read an ordinary symbol. */ - if (readCodePoints == 1 - && !Lexer.hasSingleCodepointDecomposition(codePoints[0])) { + if (readCodePoints == 1 && !Lexer.hasSingleCodepointDecomposition(codePoints[0])) { return processCharSet(codePoints[0]); } else { if (!hasFlag(TPattern.CASE_INSENSITIVE)) { @@ -611,31 +589,28 @@ public final class TPattern implements Serializable { */ private AbstractSet processSubExpression(AbstractSet last) { AbstractSet cur; - if (lexemes.isLetter() && !lexemes.isNextSpecial() - && Lexer.isLetter(lexemes.lookAhead())) { + if (lexemes.isLetter() && !lexemes.isNextSpecial() && Lexer.isLetter(lexemes.lookAhead())) { if (hasFlag(TPattern.CANON_EQ)) { - cur = processDecomposedChar(last); + cur = processDecomposedChar(); if (!lexemes.isEmpty() - /* && !pattern.isQuantifier() */ - && (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS - || last instanceof FinalSet) - && lexemes.peek() != Lexer.CHAR_VERTICAL_BAR - && !lexemes.isLetter()) { + /* && !pattern.isQuantifier() */ + && (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS || last instanceof FinalSet) && + lexemes.peek() != Lexer.CHAR_VERTICAL_BAR && !lexemes.isLetter()) { cur = processQuantifier(last, cur); } } else if (lexemes.isHighSurrogate() || lexemes.isLowSurrogate()) { AbstractSet term = processTerminal(last); cur = processQuantifier(last, term); } else { - cur = processSequence(last); + cur = processSequence(); } } else if (lexemes.peek() == Lexer.CHAR_RIGHT_PARENTHESIS) { - if (last instanceof FinalSet) { - throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); - } else { - cur = new EmptySet(last); - } + if (last instanceof FinalSet) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } else { + cur = new EmptySet(last); + } } else { AbstractSet term = processTerminal(last); cur = processQuantifier(last, term); @@ -643,23 +618,23 @@ public final class TPattern implements Serializable { if (!lexemes.isEmpty() // && !pattern.isQuantifier() - && (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS - || last instanceof FinalSet) - && lexemes.peek() != Lexer.CHAR_VERTICAL_BAR) { + && + (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS || last instanceof FinalSet) && + lexemes.peek() != Lexer.CHAR_VERTICAL_BAR) { AbstractSet next = processSubExpression(last); if (cur instanceof LeafQuantifierSet // TODO create personal UnifiedQuantifierSet for composite - // quantifiers - // to take into account Quantifier counters - // //// - && !(cur instanceof CompositeQuantifierSet) - && !(cur instanceof GroupQuantifierSet) - && !(cur instanceof AltQuantifierSet) - && !next.first(((LeafQuantifierSet) cur).getInnerSet())) { - cur = new UnifiedQuantifierSet((LeafQuantifierSet) cur); + // quantifiers + // to take into account Quantifier counters + // //// + && + !(cur instanceof CompositeQuantifierSet) && + !(cur instanceof GroupQuantifierSet) && + !(cur instanceof AltQuantifierSet) && !next.first(((LeafQuantifierSet)cur).getInnerSet())) { + cur = new UnifiedQuantifierSet((LeafQuantifierSet)cur); } - if (((char) next.getType()) == '+') { - cur.setNext(((LeafQuantifierSet) next).getInnerSet()); + if (((char)next.getType()) == '+') { + cur.setNext(((LeafQuantifierSet)next).getInnerSet()); } else { cur.setNext(next); } @@ -669,8 +644,8 @@ public final class TPattern implements Serializable { return null; } - if (((char) cur.getType()) == '+') { - return ((QuantifierSet) cur).getInnerSet(); + if (((char)cur.getType()) == '+') { + return ((QuantifierSet)cur).getInnerSet(); } else { return cur; } @@ -685,161 +660,146 @@ public final class TPattern implements Serializable { if (term != null && !(term instanceof LeafSet)) { switch (quant) { - case Lexer.QUANT_STAR: - case Lexer.QUANT_PLUS: { - QuantifierSet q; + case Lexer.QUANT_STAR: + case Lexer.QUANT_PLUS: { + QuantifierSet q; - lexemes.next(); - if (term.getType() == AbstractSet.TYPE_DOTSET) { - if (!hasFlag(TPattern.DOTALL)) { - q = new DotQuantifierSet(term, last, quant, - AbstractLineTerminator.getInstance(flags)); + lexemes.next(); + if (term.getType() == AbstractSet.TYPE_DOTSET) { + if (!hasFlag(TPattern.DOTALL)) { + q = new DotQuantifierSet(term, last, quant, AbstractLineTerminator.getInstance(flags)); + } else { + q = new DotAllQuantifierSet(term, last, quant); + } } else { - q = new DotAllQuantifierSet(term, last, quant); + q = new GroupQuantifierSet(term, last, quant); } - } else { - q = new GroupQuantifierSet(term, last, quant); + term.setNext(q); + return q; } - term.setNext(q); - return q; - } - case Lexer.QUANT_STAR_R: - case Lexer.QUANT_PLUS_R: { - lexemes.next(); - GroupQuantifierSet q = new ReluctantGroupQuantifierSet(term, - last, quant); - term.setNext(q); - return q; - } + case Lexer.QUANT_STAR_R: + case Lexer.QUANT_PLUS_R: { + lexemes.next(); + GroupQuantifierSet q = new ReluctantGroupQuantifierSet(term, last, quant); + term.setNext(q); + return q; + } - case Lexer.QUANT_PLUS_P: { - lexemes.next(); - // possessive plus will be handled by unique class - // and should not be postprocessed to point previous set - // to the inner one. - // // - return new PosPlusGroupQuantifierSet(term, last, - Lexer.QUANT_STAR_P); - } + case Lexer.QUANT_PLUS_P: { + lexemes.next(); + // possessive plus will be handled by unique class + // and should not be postprocessed to point previous set + // to the inner one. + // // + return new PosPlusGroupQuantifierSet(term, last, Lexer.QUANT_STAR_P); + } - case Lexer.QUANT_STAR_P: { - lexemes.next(); - return new PossessiveGroupQuantifierSet(term, last, quant); - } + case Lexer.QUANT_STAR_P: { + lexemes.next(); + return new PossessiveGroupQuantifierSet(term, last, quant); + } - case Lexer.QUANT_ALT: { - lexemes.next(); - AltGroupQuantifierSet q = new AltGroupQuantifierSet(term, last, - Lexer.QUANT_ALT); - term.setNext(last); - return q; - } + case Lexer.QUANT_ALT: { + lexemes.next(); + AltGroupQuantifierSet q = new AltGroupQuantifierSet(term, last, Lexer.QUANT_ALT); + term.setNext(last); + return q; + } - case Lexer.QUANT_ALT_P: { - lexemes.next(); - return new PosAltGroupQuantifierSet(term, last, Lexer.QUANT_ALT); - } + case Lexer.QUANT_ALT_P: { + lexemes.next(); + return new PosAltGroupQuantifierSet(term, last, Lexer.QUANT_ALT); + } - case Lexer.QUANT_ALT_R: { - lexemes.next(); - RelAltGroupQuantifierSet q = new RelAltGroupQuantifierSet(term, - last, Lexer.QUANT_ALT); - term.setNext(last); - return q; - } + case Lexer.QUANT_ALT_R: { + lexemes.next(); + RelAltGroupQuantifierSet q = new RelAltGroupQuantifierSet(term, last, Lexer.QUANT_ALT); + term.setNext(last); + return q; + } - case Lexer.QUANT_COMP: { - CompositeGroupQuantifierSet q = new CompositeGroupQuantifierSet( - (Quantifier) lexemes.nextSpecial(), term, last, - Lexer.QUANT_ALT, ++compCount); - term.setNext(q); - return q; - } + case Lexer.QUANT_COMP: { + CompositeGroupQuantifierSet q = new CompositeGroupQuantifierSet((Quantifier)lexemes.nextSpecial(), + term, last, Lexer.QUANT_ALT, ++compCount); + term.setNext(q); + return q; + } - case Lexer.QUANT_COMP_P: { - return new PosCompositeGroupQuantifierSet((Quantifier) lexemes - .nextSpecial(), term, last, Lexer.QUANT_ALT, - ++compCount); - } + case Lexer.QUANT_COMP_P: { + return new PosCompositeGroupQuantifierSet((Quantifier)lexemes.nextSpecial(), term, last, + Lexer.QUANT_ALT, ++compCount); + } - case Lexer.QUANT_COMP_R: { - RelCompositeGroupQuantifierSet q = new RelCompositeGroupQuantifierSet( - (Quantifier) lexemes.nextSpecial(), term, last, - Lexer.QUANT_ALT, ++compCount); - term.setNext(q); - return q; - } + case Lexer.QUANT_COMP_R: { + RelCompositeGroupQuantifierSet q = new RelCompositeGroupQuantifierSet( + (Quantifier)lexemes.nextSpecial(), term, last, Lexer.QUANT_ALT, ++compCount); + term.setNext(q); + return q; + } - default: - return term; + default: + return term; } } else { LeafSet leaf = null; if (term != null) - leaf = (LeafSet) term; + leaf = (LeafSet)term; switch (quant) { - case Lexer.QUANT_STAR: - case Lexer.QUANT_PLUS: { - lexemes.next(); - LeafQuantifierSet q = new LeafQuantifierSet(leaf, - last, quant); - leaf.setNext(q); - return q; - } + case Lexer.QUANT_STAR: + case Lexer.QUANT_PLUS: { + lexemes.next(); + LeafQuantifierSet q = new LeafQuantifierSet(leaf, last, quant); + leaf.setNext(q); + return q; + } - case Lexer.QUANT_STAR_R: - case Lexer.QUANT_PLUS_R: { - lexemes.next(); - ReluctantQuantifierSet q = new ReluctantQuantifierSet(leaf, - last, quant); - leaf.setNext(q); - return q; - } + case Lexer.QUANT_STAR_R: + case Lexer.QUANT_PLUS_R: { + lexemes.next(); + ReluctantQuantifierSet q = new ReluctantQuantifierSet(leaf, last, quant); + leaf.setNext(q); + return q; + } - case Lexer.QUANT_PLUS_P: - case Lexer.QUANT_STAR_P: { - lexemes.next(); - PossessiveQuantifierSet q = new PossessiveQuantifierSet(leaf, - last, quant); - leaf.setNext(q); - return q; - } + case Lexer.QUANT_PLUS_P: + case Lexer.QUANT_STAR_P: { + lexemes.next(); + PossessiveQuantifierSet q = new PossessiveQuantifierSet(leaf, last, quant); + leaf.setNext(q); + return q; + } - case Lexer.QUANT_ALT: { - lexemes.next(); - return new AltQuantifierSet(leaf, last, Lexer.QUANT_ALT); - } + case Lexer.QUANT_ALT: { + lexemes.next(); + return new AltQuantifierSet(leaf, last, Lexer.QUANT_ALT); + } - case Lexer.QUANT_ALT_P: { - lexemes.next(); - return new PossessiveAltQuantifierSet(leaf, last, - Lexer.QUANT_ALT_P); - } + case Lexer.QUANT_ALT_P: { + lexemes.next(); + return new PossessiveAltQuantifierSet(leaf, last, Lexer.QUANT_ALT_P); + } - case Lexer.QUANT_ALT_R: { - lexemes.next(); - return new ReluctantAltQuantifierSet(leaf, last, - Lexer.QUANT_ALT_R); - } + case Lexer.QUANT_ALT_R: { + lexemes.next(); + return new ReluctantAltQuantifierSet(leaf, last, Lexer.QUANT_ALT_R); + } - case Lexer.QUANT_COMP: { - return new CompositeQuantifierSet((Quantifier) lexemes - .nextSpecial(), leaf, last, Lexer.QUANT_COMP); - } + case Lexer.QUANT_COMP: { + return new CompositeQuantifierSet((Quantifier)lexemes.nextSpecial(), leaf, last, Lexer.QUANT_COMP); + } - case Lexer.QUANT_COMP_P: { - return new PossessiveCompositeQuantifierSet( - (Quantifier) lexemes.nextSpecial(), leaf, last, - Lexer.QUANT_COMP_P); - } - case Lexer.QUANT_COMP_R: { - return new ReluctantCompositeQuantifierSet((Quantifier) lexemes - .nextSpecial(), leaf, last, Lexer.QUANT_COMP_R); - } + case Lexer.QUANT_COMP_P: { + return new PossessiveCompositeQuantifierSet((Quantifier)lexemes.nextSpecial(), leaf, last, + Lexer.QUANT_COMP_P); + } + case Lexer.QUANT_COMP_R: { + return new ReluctantCompositeQuantifierSet((Quantifier)lexemes.nextSpecial(), leaf, last, + Lexer.QUANT_COMP_R); + } - default: - return term; + default: + return term; } } } @@ -853,186 +813,179 @@ public final class TPattern implements Serializable { do { ch = lexemes.peek(); if ((ch & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS) { - int newFlags; - lexemes.next(); - newFlags = (ch & 0x00ff0000) >> 16; - ch = ch & 0xff00ffff; - if (ch == Lexer.CHAR_FLAGS) { - flags = newFlags; - } else { - newFlags = (ch == Lexer.CHAR_NONCAP_GROUP) - ? newFlags - : flags; - term = processExpression(ch, newFlags, last); - if (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS) { - throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); - } - lexemes.next(); - } + int newFlags; + lexemes.next(); + newFlags = (ch & 0x00ff0000) >> 16; + ch = ch & 0xff00ffff; + if (ch == Lexer.CHAR_FLAGS) { + flags = newFlags; + } else { + newFlags = (ch == Lexer.CHAR_NONCAP_GROUP) ? newFlags : flags; + term = processExpression(ch, newFlags, last); + if (lexemes.peek() != Lexer.CHAR_RIGHT_PARENTHESIS) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } + lexemes.next(); + } } else switch (ch) { - case Lexer.CHAR_LEFT_SQUARE_BRACKET: { - lexemes.next(); - boolean negative = false; - if (lexemes.peek() == Lexer.CHAR_CARET) { - negative = true; + case Lexer.CHAR_LEFT_SQUARE_BRACKET: { lexemes.next(); - } - - term = processRange(negative, last); - if (lexemes.peek() != Lexer.CHAR_RIGHT_SQUARE_BRACKET) - throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); - lexemes.setMode(Lexer.MODE_PATTERN); - lexemes.next(); - break; - } - - case Lexer.CHAR_DOT: { - lexemes.next(); - - if (!hasFlag(TPattern.DOTALL)) { - term = new DotSet(AbstractLineTerminator - .getInstance(flags)); - } else { - term = new DotAllSet(); - } - - break; - } - - case Lexer.CHAR_CARET: { - lexemes.next(); - consCount++; - if (!hasFlag(TPattern.MULTILINE)) { - term = new SOLSet(); - } else { - term = new MultiLineSOLSet(AbstractLineTerminator - .getInstance(flags)); - } - - break; - } - - case Lexer.CHAR_DOLLAR: { - lexemes.next(); - consCount++; - if (!hasFlag(TPattern.MULTILINE)) { - if (!hasFlag(TPattern.UNIX_LINES)) { - term = new EOLSet(consCount); - } else { - term = new UEOLSet(consCount); - } - } else { - if (!hasFlag(TPattern.UNIX_LINES)) { - term = new MultiLineEOLSet(consCount); - } else { - term = new UMultiLineEOLSet(consCount); + boolean negative = false; + if (lexemes.peek() == Lexer.CHAR_CARET) { + negative = true; + lexemes.next(); } + + term = processRange(negative, last); + if (lexemes.peek() != Lexer.CHAR_RIGHT_SQUARE_BRACKET) + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + lexemes.setMode(Lexer.MODE_PATTERN); + lexemes.next(); + break; } - break; - } + case Lexer.CHAR_DOT: { + lexemes.next(); - case Lexer.CHAR_WORD_BOUND: { - lexemes.next(); - term = new WordBoundary(true); - break; - } + if (!hasFlag(TPattern.DOTALL)) { + term = new DotSet(AbstractLineTerminator.getInstance(flags)); + } else { + term = new DotAllSet(); + } - case Lexer.CHAR_NONWORD_BOUND: { - lexemes.next(); - term = new WordBoundary(false); - break; - } + break; + } - case Lexer.CHAR_END_OF_INPUT: { - lexemes.next(); - term = new EOISet(); - break; - } - - case Lexer.CHAR_END_OF_LINE: { - lexemes.next(); - term = new EOLSet(++consCount); - break; - } - - case Lexer.CHAR_START_OF_INPUT: { - lexemes.next(); - term = new SOLSet(); - break; - } - - case Lexer.CHAR_PREVIOUS_MATCH: { - lexemes.next(); - term = new PreviousMatch(); - break; - } - - case 0x80000000 | '1': - case 0x80000000 | '2': - case 0x80000000 | '3': - case 0x80000000 | '4': - case 0x80000000 | '5': - case 0x80000000 | '6': - case 0x80000000 | '7': - case 0x80000000 | '8': - case 0x80000000 | '9': { - int number = (ch & 0x7FFFFFFF) - '0'; - if (globalGroupIndex >= number) { + case Lexer.CHAR_CARET: { lexemes.next(); consCount++; - if (!hasFlag(TPattern.CASE_INSENSITIVE)) { - term = new BackReferenceSet(number, consCount); - } else if (!hasFlag(TPattern.UNICODE_CASE)) { - term = new CIBackReferenceSet(number, consCount); + if (!hasFlag(TPattern.MULTILINE)) { + term = new SOLSet(); } else { - term = new UCIBackReferenceSet(number, consCount); + term = new MultiLineSOLSet(AbstractLineTerminator.getInstance(flags)); } - (backRefs [number]).isBackReferenced = true; - needsBackRefReplacement = true; - break; - } else { - throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); - } - } - case 0: { - AbstractCharClass cc = null; - if ((cc = (AbstractCharClass) lexemes.peekSpecial()) != null) { - term = processRangeSet(cc); - } else if (!lexemes.isEmpty()) { - - //ch == 0 - term = new CharSet((char) ch); - } else { - term = new EmptySet(last); break; } - lexemes.next(); - break; - } - default: { - if (ch >= 0 && !lexemes.isSpecial()) { - term = processCharSet(ch); + case Lexer.CHAR_DOLLAR: { lexemes.next(); - } else if (ch == Lexer.CHAR_VERTICAL_BAR) { - term = new EmptySet(last); - } else if (ch == Lexer.CHAR_RIGHT_PARENTHESIS) { - if (last instanceof FinalSet) { - throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + consCount++; + if (!hasFlag(TPattern.MULTILINE)) { + if (!hasFlag(TPattern.UNIX_LINES)) { + term = new EOLSet(consCount); + } else { + term = new UEOLSet(consCount); + } } else { - term = new EmptySet(last); + if (!hasFlag(TPattern.UNIX_LINES)) { + term = new MultiLineEOLSet(consCount); + } else { + term = new UMultiLineEOLSet(consCount); + } + } + + break; + } + + case Lexer.CHAR_WORD_BOUND: { + lexemes.next(); + term = new WordBoundary(true); + break; + } + + case Lexer.CHAR_NONWORD_BOUND: { + lexemes.next(); + term = new WordBoundary(false); + break; + } + + case Lexer.CHAR_END_OF_INPUT: { + lexemes.next(); + term = new EOISet(); + break; + } + + case Lexer.CHAR_END_OF_LINE: { + lexemes.next(); + term = new EOLSet(++consCount); + break; + } + + case Lexer.CHAR_START_OF_INPUT: { + lexemes.next(); + term = new SOLSet(); + break; + } + + case Lexer.CHAR_PREVIOUS_MATCH: { + lexemes.next(); + term = new PreviousMatch(); + break; + } + + case 0x80000000 | '1': + case 0x80000000 | '2': + case 0x80000000 | '3': + case 0x80000000 | '4': + case 0x80000000 | '5': + case 0x80000000 | '6': + case 0x80000000 | '7': + case 0x80000000 | '8': + case 0x80000000 | '9': { + int number = (ch & 0x7FFFFFFF) - '0'; + if (globalGroupIndex >= number) { + lexemes.next(); + consCount++; + if (!hasFlag(TPattern.CASE_INSENSITIVE)) { + term = new BackReferenceSet(number, consCount); + } else if (!hasFlag(TPattern.UNICODE_CASE)) { + term = new CIBackReferenceSet(number, consCount); + } else { + term = new UCIBackReferenceSet(number, consCount); + } + (backRefs[number]).isBackReferenced = true; + needsBackRefReplacement = true; + break; + } else { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } + } + + case 0: { + AbstractCharClass cc = null; + if ((cc = (AbstractCharClass)lexemes.peekSpecial()) != null) { + term = processRangeSet(cc); + } else if (!lexemes.isEmpty()) { + + // ch == 0 + term = new CharSet((char)ch); + } else { + term = new EmptySet(last); + break; + } + lexemes.next(); + break; + } + + default: { + if (ch >= 0 && !lexemes.isSpecial()) { + term = processCharSet(ch); + lexemes.next(); + } else if (ch == Lexer.CHAR_VERTICAL_BAR) { + term = new EmptySet(last); + } else if (ch == Lexer.CHAR_RIGHT_PARENTHESIS) { + if (last instanceof FinalSet) { + throw new TPatternSyntaxException("", lexemes.toString(), lexemes.getIndex()); + } else { + term = new EmptySet(last); + } + } else { + throw new TPatternSyntaxException((lexemes.isSpecial() ? lexemes.peekSpecial().toString() + : Character.toString((char)ch)), lexemes.toString(), lexemes.getIndex()); } - } else { - throw new TPatternSyntaxException( - (lexemes.isSpecial() ? lexemes.peekSpecial() - .toString() : Character - .toString((char) ch)), lexemes - .toString(), lexemes.getIndex()); } - } } } while (ch == Lexer.CHAR_FLAGS); return term; @@ -1050,154 +1003,146 @@ public final class TPattern implements Serializable { * process [...] ranges */ private CharClass processRangeExpression(boolean alt) { - CharClass res = new CharClass(alt, hasFlag(TPattern.CASE_INSENSITIVE), - hasFlag(TPattern.UNICODE_CASE)); + CharClass res = new CharClass(alt, hasFlag(TPattern.CASE_INSENSITIVE), hasFlag(TPattern.UNICODE_CASE)); int buffer = -1; boolean intersection = false; boolean notClosed = false; boolean firstInClass = true; - while (!lexemes.isEmpty() - && (notClosed = (lexemes.peek()) != Lexer.CHAR_RIGHT_SQUARE_BRACKET - || firstInClass)) { + while (!lexemes.isEmpty() && (notClosed = (lexemes.peek()) != Lexer.CHAR_RIGHT_SQUARE_BRACKET || firstInClass)) { switch (lexemes.peek()) { - case Lexer.CHAR_RIGHT_SQUARE_BRACKET: { - if (buffer >= 0) - res.add(buffer); - buffer = ']'; - lexemes.next(); - break; - } - case Lexer.CHAR_LEFT_SQUARE_BRACKET: { - if (buffer >= 0) { - res.add(buffer); - buffer = -1; - } - lexemes.next(); - boolean negative = false; - if (lexemes.peek() == Lexer.CHAR_CARET) { - lexemes.next(); - negative = true; - } - - if (intersection) - res.intersection(processRangeExpression(negative)); - else - res.union(processRangeExpression(negative)); - intersection = false; - lexemes.next(); - break; - } - - case Lexer.CHAR_AMPERSAND: { - if (buffer >= 0) - res.add(buffer); - buffer = lexemes.next(); - - /* - * if there is a start for subrange we will do an intersection - * otherwise treat '&' as a normal character - */ - if (lexemes.peek() == Lexer.CHAR_AMPERSAND) { - if (lexemes.lookAhead() - == Lexer.CHAR_LEFT_SQUARE_BRACKET) { - lexemes.next(); - intersection = true; - buffer = -1; - } else { - lexemes.next(); - if (firstInClass) { - - //skip "&&" at "[&&...]" or "[^&&...]" - res = processRangeExpression(false); - } else { - - //ignore "&&" at "[X&&]" ending where X != empty string - if (!(lexemes.peek() - == Lexer.CHAR_RIGHT_SQUARE_BRACKET)) { - res.intersection(processRangeExpression(false)); - } - } - - } - } else { - - //treat '&' as a normal character - buffer = '&'; - } - - break; - } - - case Lexer.CHAR_HYPHEN: { - if (firstInClass - || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET - || lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET - || buffer < 0) { - // treat hypen as normal character + case Lexer.CHAR_RIGHT_SQUARE_BRACKET: { if (buffer >= 0) res.add(buffer); - buffer = '-'; + buffer = ']'; lexemes.next(); - // range - } else { + break; + } + case Lexer.CHAR_LEFT_SQUARE_BRACKET: { + if (buffer >= 0) { + res.add(buffer); + buffer = -1; + } lexemes.next(); - int cur = lexemes.peek(); + boolean negative = false; + if (lexemes.peek() == Lexer.CHAR_CARET) { + lexemes.next(); + negative = true; + } - if (!lexemes.isSpecial() - && (cur >= 0 - || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET - || lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET || buffer < 0)) { + if (intersection) + res.intersection(processRangeExpression(negative)); + else + res.union(processRangeExpression(negative)); + intersection = false; + lexemes.next(); + break; + } - try { - if (!Lexer.isLetter(cur)) { - cur = cur & 0xFFFF; + case Lexer.CHAR_AMPERSAND: { + if (buffer >= 0) + res.add(buffer); + buffer = lexemes.next(); + + /* + * if there is a start for subrange we will do an + * intersection otherwise treat '&' as a normal character + */ + if (lexemes.peek() == Lexer.CHAR_AMPERSAND) { + if (lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET) { + lexemes.next(); + intersection = true; + buffer = -1; + } else { + lexemes.next(); + if (firstInClass) { + + // skip "&&" at "[&&...]" or "[^&&...]" + res = processRangeExpression(false); + } else { + + // ignore "&&" at "[X&&]" ending where X != + // empty string + if (!(lexemes.peek() == Lexer.CHAR_RIGHT_SQUARE_BRACKET)) { + res.intersection(processRangeExpression(false)); + } } - res.add(buffer, cur); - } catch (Exception e) { + + } + } else { + + // treat '&' as a normal character + buffer = '&'; + } + + break; + } + + case Lexer.CHAR_HYPHEN: { + if (firstInClass || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET || + lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET || buffer < 0) { + // treat hypen as normal character + if (buffer >= 0) + res.add(buffer); + buffer = '-'; + lexemes.next(); + // range + } else { + lexemes.next(); + int cur = lexemes.peek(); + + if (!lexemes.isSpecial() && + (cur >= 0 || lexemes.lookAhead() == Lexer.CHAR_RIGHT_SQUARE_BRACKET || + lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET || buffer < 0)) { + + try { + if (!Lexer.isLetter(cur)) { + cur = cur & 0xFFFF; + } + res.add(buffer, cur); + } catch (Exception e) { + throw new TPatternSyntaxException("", pattern(), lexemes.getIndex()); + } + lexemes.next(); + buffer = -1; + } else { throw new TPatternSyntaxException("", pattern(), lexemes.getIndex()); } - lexemes.next(); + } + + break; + } + + case Lexer.CHAR_CARET: { + if (buffer >= 0) + res.add(buffer); + buffer = '^'; + lexemes.next(); + break; + } + + case 0: { + if (buffer >= 0) + res.add(buffer); + AbstractCharClass cs = (AbstractCharClass)lexemes.peekSpecial(); + if (cs != null) { + res.add(cs); buffer = -1; } else { - throw new TPatternSyntaxException("", pattern(), lexemes.getIndex()); + buffer = 0; } + + lexemes.next(); + break; } - break; - } - - case Lexer.CHAR_CARET: { - if (buffer >= 0) - res.add(buffer); - buffer = '^'; - lexemes.next(); - break; - } - - case 0: { - if (buffer >= 0) - res.add(buffer); - AbstractCharClass cs = (AbstractCharClass) lexemes - .peekSpecial(); - if (cs != null) { - res.add(cs); - buffer = -1; - } else { - buffer = 0; + default: { + if (buffer >= 0) + res.add(buffer); + buffer = lexemes.next(); + break; } - - lexemes.next(); - break; - } - - default: { - if (buffer >= 0) - res.add(buffer); - buffer = lexemes.next(); - break; - } } firstInClass = false; @@ -1211,28 +1156,25 @@ public final class TPattern implements Serializable { } private AbstractSet processCharSet(int ch) { - boolean isSupplCodePoint = Character - .isSupplementaryCodePoint(ch); + boolean isSupplCodePoint = Character.isSupplementaryCodePoint(ch); if (hasFlag(TPattern.CASE_INSENSITIVE)) { - if ((ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z')) { - return new CICharSet((char) ch); - } else if (hasFlag(TPattern.UNICODE_CASE) - && ch > 128) { + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + return new CICharSet((char)ch); + } else if (hasFlag(TPattern.UNICODE_CASE) && ch > 128) { if (isSupplCodePoint) { return new UCISupplCharSet(ch); } else if (Lexer.isLowSurrogate(ch)) { - //we need no UCILowSurrogateCharSet - return new LowSurrogateCharSet((char) ch); + // we need no UCILowSurrogateCharSet + return new LowSurrogateCharSet((char)ch); } else if (Lexer.isHighSurrogate(ch)) { - //we need no UCIHighSurrogateCharSet - return new HighSurrogateCharSet((char) ch); + // we need no UCIHighSurrogateCharSet + return new HighSurrogateCharSet((char)ch); } else { - return new UCICharSet((char) ch); + return new UCICharSet((char)ch); } } } @@ -1240,40 +1182,33 @@ public final class TPattern implements Serializable { if (isSupplCodePoint) { return new SupplCharSet(ch); } else if (Lexer.isLowSurrogate(ch)) { - return new LowSurrogateCharSet((char) ch); + return new LowSurrogateCharSet((char)ch); } else if (Lexer.isHighSurrogate(ch)) { - return new HighSurrogateCharSet((char) ch); + return new HighSurrogateCharSet((char)ch); } else { - return new CharSet((char) ch); + return new CharSet((char)ch); } } private AbstractSet processRangeSet(AbstractCharClass charClass) { if (charClass.hasLowHighSurrogates()) { AbstractCharClass surrogates = charClass.getSurrogates(); - LowHighSurrogateRangeSet lowHighSurrRangeSet - = new LowHighSurrogateRangeSet(surrogates); + LowHighSurrogateRangeSet lowHighSurrRangeSet = new LowHighSurrogateRangeSet(surrogates); if (charClass.mayContainSupplCodepoints()) { if (!charClass.hasUCI()) { - return new CompositeRangeSet( - new SupplRangeSet(charClass.getWithoutSurrogates()), + return new CompositeRangeSet(new SupplRangeSet(charClass.getWithoutSurrogates()), lowHighSurrRangeSet); } else { - return new CompositeRangeSet( - new UCISupplRangeSet(charClass.getWithoutSurrogates()), + return new CompositeRangeSet(new UCISupplRangeSet(charClass.getWithoutSurrogates()), lowHighSurrRangeSet); } } if (!charClass.hasUCI()) { - return new CompositeRangeSet( - new RangeSet(charClass.getWithoutSurrogates()), - lowHighSurrRangeSet); + return new CompositeRangeSet(new RangeSet(charClass.getWithoutSurrogates()), lowHighSurrRangeSet); } else { - return new CompositeRangeSet( - new UCIRangeSet(charClass.getWithoutSurrogates()), - lowHighSurrRangeSet); + return new CompositeRangeSet(new UCIRangeSet(charClass.getWithoutSurrogates()), lowHighSurrRangeSet); } } @@ -1294,8 +1229,8 @@ public final class TPattern implements Serializable { /** * Compiles a regular expression, creating a new Pattern instance in the - * process. This is actually a convenience method that calls {@link - * #compile(String, int)} with a {@code flags} value of zero. + * process. This is actually a convenience method that calls + * {@link #compile(String, int)} with a {@code flags} value of zero. * * @param pattern * the regular expression. @@ -1310,17 +1245,16 @@ public final class TPattern implements Serializable { } /* - * This method do traverses of - * automata to finish compilation. + * This method do traverses of automata to finish compilation. */ private void finalizeCompile() { - /* - * Processing second pass - */ - if (needsBackRefReplacement) { //|| needsReason1 || needsReason2) { - start.processSecondPass(); - } + /* + * Processing second pass + */ + if (needsBackRefReplacement) { // || needsReason1 || needsReason2) { + start.processSecondPass(); + } } @@ -1414,14 +1348,13 @@ public final class TPattern implements Serializable { /** * Serialization support */ - private void readObject(java.io.ObjectInputStream s) - throws java.io.IOException, ClassNotFoundException { + private void readObject(java.io.ObjectInputStream s) throws java.io.IOException, ClassNotFoundException { s.defaultReadObject(); AbstractSet.counter = 1; globalGroupIndex = -1; compCount = -1; consCount = -1; - backRefs = new FSet [BACK_REF_NUMBER]; + backRefs = new FSet[BACK_REF_NUMBER]; compileImpl(pattern, flags); diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java index 6a79fac7b..6ad5dbd51 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/TPatternSyntaxException.java @@ -91,15 +91,15 @@ public class TPatternSyntaxException extends IllegalArgumentException { * * @return the error message. */ + @Override public String getMessage() { - String filler = ""; //$NON-NLS-1$ + String filler = ""; if (index >= 1) { char[] temp = new char[index]; Arrays.fill(temp, ' '); filler = new String(temp); } - return desc - + ((pattern != null && pattern.length() != 0) ? index + ", " + pattern + ", " + filler : ""); + return desc + ((pattern != null && pattern.length() != 0) ? index + ", " + pattern + ", " + filler : ""); } /** diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java index a79603576..cb71d0173 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIBackReferenceSet.java @@ -33,26 +33,24 @@ class UCIBackReferenceSet extends CIBackReferenceSet { super(groupIndex, consCounter); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { String group = getString(matchResult); - if (group == null - || (stringIndex + group.length()) > matchResult.getRightBound()) + if (group == null || (stringIndex + group.length()) > matchResult.getRightBound()) return -1; for (int i = 0; i < group.length(); i++) { - if (Character.toLowerCase(Character.toUpperCase(group.charAt(i))) != Character - .toLowerCase(Character.toUpperCase(testString - .charAt(stringIndex + i)))) { + if (Character.toLowerCase(Character.toUpperCase(group.charAt(i))) != Character.toLowerCase(Character + .toUpperCase(testString.charAt(stringIndex + i)))) { return -1; } } matchResult.setConsumed(consCounter, group.length()); - return next.matches(stringIndex + group.length(), testString, - matchResult); + return next.matches(stringIndex + group.length(), testString, matchResult); } + @Override public String getName() { return "UCI back reference: " + this.groupIndex; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java index 885006dda..cec7ceb9b 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCICharSet.java @@ -34,12 +34,14 @@ class UCICharSet extends LeafSet { this.ch = Character.toLowerCase(Character.toUpperCase(ch)); } + @Override public int accepts(int strIndex, CharSequence testString) { return (this.ch == Character.toLowerCase(Character .toUpperCase(testString.charAt(strIndex)))) ? 1 : -1; } + @Override protected String getName() { - return "UCI " + ch; //$NON-NLS-1$ + return "UCI " + ch; } } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java index 638addb91..1ef12365f 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCIRangeSet.java @@ -44,12 +44,14 @@ class UCIRangeSet extends LeafSet { this.alt = cc.alt; } + @Override public int accepts(int strIndex, CharSequence testString) { return (chars.contains(Character.toLowerCase(Character .toUpperCase(testString.charAt(strIndex))))) ? 1 : -1; } + @Override protected String getName() { - return "UCI range:" + (alt ? "^ " : " ") + chars.toString(); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + return "UCI range:" + (alt ? "^ " : " ") + chars.toString(); } } \ No newline at end of file diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java index 55e9e8f4e..f05441e01 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISequenceSet.java @@ -32,17 +32,16 @@ class UCISequenceSet extends LeafSet { UCISequenceSet(StringBuffer substring) { StringBuilder res = new StringBuilder(); for (int i = 0; i < substring.length(); i++) { - res.append(Character.toLowerCase(Character.toUpperCase(substring - .charAt(i)))); + res.append(Character.toLowerCase(Character.toUpperCase(substring.charAt(i)))); } this.string = res.toString(); this.charCount = res.length(); } + @Override public int accepts(int strIndex, CharSequence testString) { for (int i = 0; i < string.length(); i++) { - if (string.charAt(i) != Character.toLowerCase(Character - .toUpperCase(testString.charAt(strIndex + i)))) { + if (string.charAt(i) != Character.toLowerCase(Character.toUpperCase(testString.charAt(strIndex + i)))) { return -1; } } @@ -51,6 +50,7 @@ class UCISequenceSet extends LeafSet { } + @Override public String getName() { return "UCI sequence: " + string; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java index f32ff886b..ea64b1661 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplCharSet.java @@ -83,12 +83,12 @@ package org.teavm.classlib.java.util.regex; /** - * Represents node accepting single supplementary - * codepoint in Unicode case insensitive manner. + * Represents node accepting single supplementary codepoint in Unicode case + * insensitive manner. */ class UCISupplCharSet extends LeafSet { - //int value of this supplementary codepoint + // int value of this supplementary codepoint private int ch; public UCISupplCharSet(int ch) { @@ -96,14 +96,14 @@ class UCISupplCharSet extends LeafSet { this.ch = Character.toLowerCase(Character.toUpperCase(ch)); } + @Override public int accepts(int strIndex, CharSequence testString) { char high = testString.charAt(strIndex++); char low = testString.charAt(strIndex); - return (this.ch == Character.toLowerCase(Character - .toUpperCase(Character - .toCodePoint(high, low)))) ? 2 : -1; + return (this.ch == Character.toLowerCase(Character.toUpperCase(Character.toCodePoint(high, low)))) ? 2 : -1; } + @Override protected String getName() { return "UCI " + new String(Character.toChars(ch)); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java index 159019c31..8aa2f2837 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UCISupplRangeSet.java @@ -98,10 +98,12 @@ class UCISupplRangeSet extends SupplRangeSet{ super(cc); } + @Override public boolean contains(int ch) { return chars.contains(Character.toLowerCase(Character.toUpperCase(ch))); } + @Override protected String getName() { return "UCI range:" + (alt ? "^ " : " ") + chars.toString(); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java index aa61db378..f34431102 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UEOLSet.java @@ -33,10 +33,9 @@ final class UEOLSet extends AbstractSet { this.consCounter = counter; } - public int matches(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { - int rightBound = matchResult.hasAnchoringBounds() ? matchResult - .getRightBound() : testString.length(); + @Override + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length(); if (strIndex >= rightBound) { matchResult.setConsumed(consCounter, 0); @@ -52,6 +51,7 @@ final class UEOLSet extends AbstractSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons; boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); @@ -59,6 +59,7 @@ final class UEOLSet extends AbstractSet { return res; } + @Override protected String getName() { return ""; //$NON-NLS-1$ } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java index 6a77173a9..9c278274e 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UMultiLineEOLSet.java @@ -33,11 +33,10 @@ class UMultiLineEOLSet extends AbstractSet { this.consCounter = counter; } - public int matches(int strIndex, CharSequence testString, - MatchResultImpl matchResult) { - int strDif = matchResult.hasAnchoringBounds() ? matchResult - .getRightBound() - - strIndex : testString.length() - strIndex; + @Override + public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) { + int strDif = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() - strIndex : testString.length() - + strIndex; if (strDif <= 0) { matchResult.setConsumed(consCounter, 0); return next.matches(strIndex, testString, matchResult); @@ -48,6 +47,7 @@ class UMultiLineEOLSet extends AbstractSet { return -1; } + @Override public boolean hasConsumed(MatchResultImpl matchResult) { int cons; boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0); @@ -55,7 +55,8 @@ class UMultiLineEOLSet extends AbstractSet { return res; } + @Override protected String getName() { - return ""; //$NON-NLS-1$ + return ""; } } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java index 40fce58ed..310352af5 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategory.java @@ -33,6 +33,7 @@ class UnicodeCategory extends AbstractCharClass { this.category = category; } + @Override public boolean contains(int ch) { return alt ^ (category == Character.getType((char) ch)); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java index 514fcfe51..ad6360d37 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnicodeCategoryScope.java @@ -30,6 +30,7 @@ class UnicodeCategoryScope extends UnicodeCategory { super(category); } + @Override public boolean contains(int ch) { return alt ^ ((category >> Character.getType((char) ch)) & 1) != 0; } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java index 2450f50aa..d9bf00315 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/UnifiedQuantifierSet.java @@ -21,8 +21,8 @@ package org.teavm.classlib.java.util.regex; /** - * Greedy quantifier node for the case where there is no intersection with - * next node and normal quantifiers could be treated as greedy and possessive. + * Greedy quantifier node for the case where there is no intersection with next + * node and normal quantifiers could be treated as greedy and possessive. * * @author Nikolay A. Kuznetsov */ @@ -33,28 +33,27 @@ class UnifiedQuantifierSet extends LeafQuantifierSet { } public UnifiedQuantifierSet(LeafQuantifierSet quant) { - super((LeafSet) quant.getInnerSet(), quant.getNext(), quant.getType()); + super((LeafSet)quant.getInnerSet(), quant.getNext(), quant.getType()); innerSet.setNext(this); } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { - while (stringIndex + leaf.charCount() <= matchResult.getRightBound() - && leaf.accepts(stringIndex, testString) > 0) + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { + while (stringIndex + leaf.charCount() <= matchResult.getRightBound() && + leaf.accepts(stringIndex, testString) > 0) stringIndex += leaf.charCount(); return next.matches(stringIndex, testString, matchResult); } - public int find(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { int startSearch = next.find(stringIndex, testString, matchResult); if (startSearch < 0) return -1; int newSearch = startSearch - leaf.charCount(); - while (newSearch >= stringIndex - && leaf.accepts(newSearch, testString) > 0) { + while (newSearch >= stringIndex && leaf.accepts(newSearch, testString) > 0) { startSearch = newSearch; newSearch -= leaf.charCount(); } diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java index bb29647f9..68d71e9e7 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/util/regex/WordBoundary.java @@ -34,39 +34,36 @@ class WordBoundary extends AbstractSet { this.positive = positive; } - public int matches(int stringIndex, CharSequence testString, - MatchResultImpl matchResult) { + @Override + public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) { boolean left; boolean right; - char ch1 = stringIndex >= matchResult.getRightBound() ? ' ' : testString - .charAt(stringIndex); + char ch1 = stringIndex >= matchResult.getRightBound() ? ' ' : testString.charAt(stringIndex); char ch2 = stringIndex == 0 ? ' ' : testString.charAt(stringIndex - 1); - int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult - .getLeftBound(); + int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound(); left = (ch1 == ' ') || isSpace(ch1, stringIndex, leftBound, testString); - right = (ch2 == ' ') - || isSpace(ch2, stringIndex - 1, leftBound, testString); - return ((left ^ right) ^ positive) ? -1 : next.matches(stringIndex, - testString, matchResult); + right = (ch2 == ' ') || isSpace(ch2, stringIndex - 1, leftBound, testString); + return ((left ^ right) ^ positive) ? -1 : next.matches(stringIndex, testString, matchResult); } /** * Returns false, because word boundary does not consumes any characters and * do not move string index. */ + @Override public boolean hasConsumed(MatchResultImpl matchResult) { // only checks boundary, do not consumes characters return false; } + @Override protected String getName() { return "WordBoundary"; //$NON-NLS-1$ } - private boolean isSpace(char ch, int index, int leftBound, - CharSequence testString) { + private boolean isSpace(char ch, int index, int leftBound, CharSequence testString) { if (Character.isLetterOrDigit(ch) || ch == '_') return false; if (Character.getType(ch) == Character.NON_SPACING_MARK) { diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java index 606fc6cde..121520e82 100644 --- a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/PatternTest.java @@ -17,16 +17,13 @@ package org.teavm.classlib.java.util.regex; -import java.io.Serializable; +import static org.junit.Assert.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.junit.Test; -import junit.framework.TestCase; - - -@SuppressWarnings("nls") -public class PatternTest extends TestCase { +public class PatternTest { String[] testPatterns = { "(a|b)*abb", "(1*2*3*4*)*567", @@ -40,18 +37,13 @@ public class PatternTest extends TestCase { }; - public PatternTest(String name) { - super(name); - } - + @Test public void testCommentsInPattern() { Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS); assertTrue(p.matcher("abcd").matches()); } - /* - * Class under test for String[] split(CharSequence, int) - */ + @Test public void testSplitCharSequenceint() { // splitting CharSequence which ends with pattern // bug6193 @@ -126,9 +118,7 @@ public class PatternTest extends TestCase { assertEquals(s.length, 5); } - /* - * Class under test for String[] split(CharSequence) - */ + @Test public void testSplitCharSequence() { String s[]; Pattern pat = Pattern.compile("b"); @@ -151,6 +141,7 @@ public class PatternTest extends TestCase { public void testPattern() { } + @Test public void testFlags() { String baseString; String testString; @@ -274,6 +265,7 @@ public class PatternTest extends TestCase { assertFalse(mat.matches()); } + @Test public void testFlagsMethod() { String baseString; Pattern pat; @@ -313,9 +305,7 @@ public class PatternTest extends TestCase { assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); } - /* - * Class under test for Pattern compile(String, int) - */ + @Test public void testCompileStringint() { /* * this tests are needed to verify that appropriate exceptions are @@ -368,9 +358,7 @@ public class PatternTest extends TestCase { Pattern.compile(pattern); } - /* - * Class under test for Pattern compile(String) - */ + @Test public void testQuantCompileNeg() { String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", "{5,3shdfkjh}" }; @@ -387,13 +375,15 @@ public class PatternTest extends TestCase { assertNotNull(Pattern.compile(pattern)); } + @Test public void testQuantCompilePos() { - String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" }; + String[] patterns = { "abc{2,}", "abc{5}" }; for (String element : patterns) { Pattern.compile(element); } } + @Test public void testQuantComposition() { String pattern = "(a{1,3})aab"; java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern); @@ -403,6 +393,7 @@ public class PatternTest extends TestCase { mat.group(1); } + @Test public void testMatches() { String[][] posSeq = { { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, @@ -431,6 +422,7 @@ public class PatternTest extends TestCase { } } + @Test public void testTimeZoneIssue() { Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?"); Matcher m = p.matcher("GMT-9:45"); @@ -441,6 +433,7 @@ public class PatternTest extends TestCase { assertEquals("45", m.group(4)); } + @Test public void testCompileRanges() { String[] correctTestPatterns = { "[^]*abb]*", "[^a-d[^m-p]]*abb", "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", @@ -472,6 +465,7 @@ public class PatternTest extends TestCase { } } + @Test public void testRangesSpecialCases() { String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" }; @@ -494,10 +488,12 @@ public class PatternTest extends TestCase { } } + @Test public void testZeroSymbols() { assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb")); } + @Test public void testEscapes() { Pattern pat = Pattern.compile("\\Q{]()*?"); Matcher mat = pat.matcher("{]()*?"); @@ -505,6 +501,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testRegressions() { // Bug 181 Pattern.compile("[\\t-\\r]"); @@ -519,6 +516,7 @@ public class PatternTest extends TestCase { Pattern.compile("\\u6211", Pattern.LITERAL); } + @Test public void testOrphanQuantifiers() { try { Pattern.compile("+++++"); @@ -527,6 +525,7 @@ public class PatternTest extends TestCase { } } + @Test public void testOrphanQuantifiers2() { try { Pattern.compile("\\d+*"); @@ -535,6 +534,7 @@ public class PatternTest extends TestCase { } } + @Test public void testBug197() { Object[] vals = { ":", new Integer(2), new String[] { "boo", "and:foo" }, ":", new Integer(5), @@ -560,6 +560,7 @@ public class PatternTest extends TestCase { } } + @Test public void testURIPatterns() { String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; @@ -578,6 +579,7 @@ public class PatternTest extends TestCase { Pattern.compile(HOSTNAME_REGEXP_STR); } + @Test public void testFindBoundaryCases1() { Pattern pat = Pattern.compile(".*\n"); Matcher mat = pat.matcher("a\n"); @@ -587,6 +589,7 @@ public class PatternTest extends TestCase { } + @Test public void testFindBoundaryCases2() { Pattern pat = Pattern.compile(".*A"); Matcher mat = pat.matcher("aAa"); @@ -596,6 +599,7 @@ public class PatternTest extends TestCase { } + @Test public void testFindBoundaryCases3() { Pattern pat = Pattern.compile(".*A"); Matcher mat = pat.matcher("a\naA\n"); @@ -605,6 +609,7 @@ public class PatternTest extends TestCase { } + @Test public void testFindBoundaryCases4() { Pattern pat = Pattern.compile("A.*"); Matcher mat = pat.matcher("A\n"); @@ -614,6 +619,7 @@ public class PatternTest extends TestCase { } + @Test public void testFindBoundaryCases5() { Pattern pat = Pattern.compile(".*A.*"); Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n"); @@ -625,6 +631,7 @@ public class PatternTest extends TestCase { } } + @Test public void testFindBoundaryCases6() { String[] res = { "", "a", "", "" }; Pattern pat = Pattern.compile(".*"); @@ -639,14 +646,14 @@ public class PatternTest extends TestCase { public void _testFindBoundaryCases7() { Pattern pat = Pattern.compile(".*"); Matcher mat = pat.matcher("\na\n"); - int k = 0; - for (; mat.find(); k++) { + while (mat.find()) { System.out.println(mat.group()); System.out.flush(); } } + @Test public void testBackReferences() { Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); Matcher mat = pat @@ -680,6 +687,7 @@ public class PatternTest extends TestCase { assertEquals(3, k); } + @Test public void testNewLine() { Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE); Matcher mat = pat.matcher("\r\n\n"); @@ -690,6 +698,7 @@ public class PatternTest extends TestCase { assertEquals(2, counter); } + @Test public void testFindGreedy() { Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL); Matcher mat = pat.matcher("aaaa\naaa\naaaaaa"); @@ -697,6 +706,7 @@ public class PatternTest extends TestCase { assertEquals(15, mat.end()); } + @Test public void testSOLQuant() { Pattern pat = Pattern.compile("$*", Pattern.MULTILINE); Matcher mat = pat.matcher("\n\n"); @@ -708,6 +718,7 @@ public class PatternTest extends TestCase { assertEquals(3, counter); } + @Test public void testIllegalEscape() { try { Pattern.compile("\\y"); @@ -716,10 +727,12 @@ public class PatternTest extends TestCase { } } + @Test public void testEmptyFamily() { Pattern.compile("\\p{Lower}"); } + @Test public void testNonCaptConstr() { // Flags Pattern pat = Pattern.compile("(?i)b*(?-i)a*"); @@ -804,12 +817,14 @@ public class PatternTest extends TestCase { Pattern.compile("ab(a){1,3}?(c)d"); } + @Test public void testCompilePatternWithTerminatorMark() { Pattern pat = Pattern.compile("a\u0000\u0000cd"); Matcher mat = pat.matcher("a\u0000\u0000cd"); assertTrue(mat.matches()); } + @Test public void testAlternations() { String baseString = "|a|bc"; Pattern pat = Pattern.compile(baseString); @@ -878,6 +893,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testMatchWithGroups() { String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr"; String pattern = ".*(..).*\\1.*"; @@ -889,12 +905,14 @@ public class PatternTest extends TestCase { assertTrue(Pattern.compile(pattern).matcher(baseString).find()); } + @Test public void testSplitEmptyCharSequence() { String s1 = ""; String[] arr = s1.split(":"); assertEquals(arr.length, 1); } + @Test public void testSplitEndsWithPattern() { assertEquals(",,".split(",", 3).length, 3); assertEquals(",,".split(",", 4).length, 3); @@ -903,10 +921,12 @@ public class PatternTest extends TestCase { assertEquals(Pattern.compile("b").split("ab", -1).length, 2); } + @Test public void testCaseInsensitiveFlag() { assertTrue(Pattern.matches("(?i-:AbC)", "ABC")); } + @Test public void testEmptyGroups() { Pattern pat = Pattern.compile("ab(?>)cda"); Matcher mat = pat.matcher("abcda"); @@ -921,6 +941,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testCompileNonCaptGroup() { boolean isCompiled = false; @@ -939,6 +960,7 @@ public class PatternTest extends TestCase { assertTrue(isCompiled); } + @Test public void testEmbeddedFlags() { String baseString = "(?i)((?s)a)"; String testString = "A"; @@ -971,10 +993,12 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testAltWithFlags() { Pattern.compile("|(?i-xi)|()"); } + @Test public void testRestoreFlagsAfterGroup() { String baseString = "abc((?x)d) a"; String testString = "abcd a"; @@ -984,10 +1008,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } - /* - * Verify if the Pattern support the following character classes: - * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored} - */ + @Test public void testCompileCharacterClass() { // Regression for HARMONY-606, 696 Pattern pattern = Pattern.compile("\\p{javaLowerCase}"); @@ -1039,300 +1060,7 @@ public class PatternTest extends TestCase { assertNotNull(pattern); } - public void testCanonEqFlag() { - - /* - * for decompositions see - * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt - * http://www.unicode.org/reports/tr15/#Decomposition - */ - String baseString; - String testString; - Pattern pat; - Matcher mat; - - baseString = "ab(a*)\\1"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - baseString = "a(abcdf)d"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - baseString = "aabcdfd"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 - // \u00CC -> \u0049\u0300 - - baseString = "\u01E0\u00CCcdb(ac)"; - testString = "\u0226\u0304\u0049\u0300cdbac"; - // TODO investigate, why this fails and uncomment - /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - baseString = "\u01E0cdb(a\u00CCc)"; - testString = "\u0041\u0307\u0304cdba\u0049\u0300c"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "a\u00CC"; - testString = "a\u0049\u0300"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "\u0226\u0304cdb(ac\u0049\u0300)"; - testString = "\u01E0cdbac\u00CC"; - // TODO investigate, why this fails and uncomment - /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - baseString = "cdb(?:\u0041\u0307\u0304\u00CC)"; - testString = "cdb\u0226\u0304\u0049\u0300"; - // TODO investigate, why this fails and uncomment - //pat = Pattern.compile(baseString, Pattern.CANON_EQ); - //mat = pat.matcher(testString); - // assertTrue(mat.matches()); - - baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)"; - testString = "\u01E0b\u00CCcdbac"; - // TODO investigate, why this fails and uncomment - // pat = Pattern.compile(baseString, Pattern.CANON_EQ); - // mat = pat.matcher(testString); - // assertTrue(mat.matches()); - - baseString = "\u01E0|\u00CCcdb(ac)"; - testString = "\u0041\u0307\u0304"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - //assertTrue(mat.matches()); - - baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]"; - testString = "cdb\u0041\u0307\u0304b"; - // TODO investigate, why this fails and uncomment - /*pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - baseString = "a\u0300"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("a\u00E0a"); - assertTrue(mat.find()); - - baseString = "\u7B20\uF9F8abc"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("\uF9F8\uF9F8abc"); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - - // \u01F9 -> \u006E\u0300 - // \u00C3 -> \u0041\u0303 - - baseString = "cdb(?:\u00C3\u006E\u0300)"; - testString = "cdb\u0041\u0303\u01F9"; - // TODO investigate, why this fails and uncomment - /* - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - // \u014C -> \u004F\u0304 - // \u0163 -> \u0074\u0327 - - baseString = "cdb(?:\u0163\u004F\u0304)"; - testString = "cdb\u0074\u0327\u014C"; - // TODO investigate, why this fails and uncomment - /* - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - // \u00E1->a\u0301 - // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 - - baseString = "c\u0327\u0301"; - testString = "c\u0301\u0327"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - /* - * Hangul decompositions - */ - // \uD4DB->\u1111\u1171\u11B6 - // \uD21E->\u1110\u116D\u11B5 - // \uD264->\u1110\u1170 - // not Hangul:\u0453->\u0433\u0301 - baseString = "a\uD4DB\u1111\u1171\u11B6\uD264"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - baseString = "\u0453c\uD4DB"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - baseString = "a\u1110\u116D\u11B5b\uD21Ebc"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - - baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)"; - testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - - baseString = "\uD4DB\uD264cdb(a\uD21Ec)"; - testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "a\uD4DB"; - testString = "a\u1111\u1171\u11B6"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "a\uD21E"; - testString = "a\u1110\u116D\u11B5"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)"; - testString = "\uD4DBcdbac\uD21E"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - - baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)"; - testString = "cdb\uD4DB\u1110\u116D\u11B5"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - /* - assertTrue(mat.matches()); - - baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)"; - testString = "\uD4DBb\uD21Ecdbac"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "\uD4DB|\u00CCcdb(ac)"; - testString = "\u1111\u1171\u11B6"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "\uD4DB|\u00CCcdb(ac)"; - testString = "\u1111\u1171"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertFalse(mat.matches()); - - baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]"; - testString = "cdb\u1111\u1171\u11B6b"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - baseString = "\uD4DB"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("a\u1111\u1171\u11B6a"); - assertTrue(mat.find()); - - baseString = "\u1111"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("bcda\uD4DBr"); - assertFalse(mat.find());*/ - } - - public void testIndexesCanonicalEq() { - String baseString; - String testString; - Pattern pat; - Matcher mat; - - baseString = "\uD4DB"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("bcda\u1111\u1171\u11B6awr"); - assertTrue(mat.find()); - assertEquals(mat.start(), 4); - assertEquals(mat.end(), 7); - - baseString = "\uD4DB\u1111\u1171\u11B6"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr"); - // TODO investigate, why this fails and uncomment - /*assertTrue(mat.find()); - assertEquals(mat.start(), 4); - assertEquals(mat.end(), 8);*/ - - baseString = "\uD4DB\uD21E\u1110\u1170"; - testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; - pat = Pattern.compile(baseString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - /*assertTrue(mat.find()); - assertEquals(mat.start(), 6); - assertEquals(mat.end(), 13);*/ - } - - public void testCanonEqFlagWithSupplementaryCharacters() { - - /* - * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 - * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F - * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 - */ - String patString = "abc\uD834\uDDBFef"; - String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; - Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ); - Matcher mat = pat.matcher(testString); - assertTrue(mat.matches()); - - testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; - // TODO investigate, why this fails and uncomment - /*mat = pat.matcher(testString); - assertTrue(mat.matches());*/ - - patString = "abc\uD834\uDDBB\uD834\uDD6Fef"; - testString = "abc\uD834\uDDBFef"; - pat = Pattern.compile(patString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - //assertTrue(mat.matches()); - - testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; - mat = pat.matcher(testString); - assertTrue(mat.matches()); - - patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; - testString = "abc\uD834\uDDBFef"; - pat = Pattern.compile(patString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - - testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - - /* - * testSupplementary characters with no decomposition - */ - patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef"; - testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef"; - pat = Pattern.compile(patString, Pattern.CANON_EQ); - mat = pat.matcher(testString); - // TODO investigate, why this fails and uncomment - // assertTrue(mat.matches()); - } - + @Test public void testRangesWithSurrogatesSupplementary() { String patString = "[abc\uD8D2]"; String testString = "\uD8D2"; @@ -1399,6 +1127,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testSequencesWithSurrogatesSupplementary() { String patString = "abcd\uD8D3"; String testString = "abcd\uD8D3\uDFFC"; @@ -1433,6 +1162,7 @@ public class PatternTest extends TestCase { assertTrue(mat.find()); } + @Test public void testPredefinedClassesWithSurrogatesSupplementary() { String patString = "[123\\D]"; String testString = "a"; @@ -1496,16 +1226,6 @@ public class PatternTest extends TestCase { pat = Pattern.compile(patString); mat = pat.matcher(testString); - /* - * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we - * have to treat text as code points not code units. \\p{Cs} matches any - * surrogate character but here testString is a one code point - * consisting of two code units (two surrogate characters) so we find - * nothing - */ - // TODO investigate, why this fails and uncomment - //assertFalse(mat.find()); - // swap low and high surrogates testString = "\uDE27\uD916"; mat = pat.matcher(testString); @@ -1536,6 +1256,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testDotConstructionWithSurrogatesSupplementary() { String patString = "."; String testString = "\uD9A0\uDE81"; @@ -1572,6 +1293,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testQuantifiersWithSurrogatesSupplementary() { String patString = "\uD9A0\uDE81*abc"; String testString = "\uD9A0\uDE81\uD9A0\uDE81abc"; @@ -1584,6 +1306,7 @@ public class PatternTest extends TestCase { assertTrue(mat.matches()); } + @Test public void testAlternationsWithSurrogatesSupplementary() { String patString = "\uDE81|\uD9A0\uDE81|\uD9A0"; String testString = "\uD9A0"; @@ -1604,6 +1327,7 @@ public class PatternTest extends TestCase { assertFalse(mat.matches()); } + @Test public void testGroupsWithSurrogatesSupplementary() { // this pattern matches nothing @@ -1620,16 +1344,10 @@ public class PatternTest extends TestCase { assertFalse(mat.find()); } - /* - * Regression test for HARMONY-688 - */ + @Test public void testUnicodeCategoryWithSurrogatesSupplementary() { Pattern p = Pattern.compile("\\p{javaLowerCase}"); Matcher matcher = p.matcher("\uD801\uDC28"); assertTrue(matcher.find()); } - - public static void main(String[] args) { - junit.textui.TestRunner.run(PatternTest.class); - } } diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java index 1583ee391..2f2e122e7 100644 --- a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/ReplaceTest.java @@ -4,9 +4,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,15 +16,15 @@ package org.teavm.classlib.java.util.regex; +import static org.junit.Assert.assertEquals; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.junit.Test; -import junit.framework.TestCase; - -@SuppressWarnings("nls") -public class ReplaceTest extends TestCase { +public class ReplaceTest { + @Test public void testSimpleReplace() throws PatternSyntaxException { String target, pattern, repl; @@ -39,6 +39,7 @@ public class ReplaceTest extends TestCase { assertEquals("foobarxxxarfooxxx", m.replaceAll(repl)); } + @Test public void testCaptureReplace() { String target, pattern, repl, s; Pattern p = null; @@ -72,6 +73,7 @@ public class ReplaceTest extends TestCase { s); } + @Test public void testEscapeReplace() { String target, pattern, repl, s; diff --git a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java index 050bcb5eb..48c534687 100644 --- a/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java +++ b/teavm-classlib/src/test/java/org/teavm/classlib/java/util/regex/SplitTest.java @@ -17,17 +17,18 @@ package org.teavm.classlib.java.util.regex; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; - -import junit.framework.TestCase; +import org.junit.Test; /** * TODO Type description */ -@SuppressWarnings("nls") -public class SplitTest extends TestCase { +public class SplitTest { + @Test public void testSimple() { Pattern p = Pattern.compile("/"); String[] results = p.split("have/you/done/it/right"); @@ -38,6 +39,7 @@ public class SplitTest extends TestCase { } } + @Test public void testSplit1() throws PatternSyntaxException { Pattern p = Pattern.compile(" "); @@ -129,6 +131,7 @@ public class SplitTest extends TestCase { assertEquals("dle z", tokens[2]); } + @Test public void testSplit2() { Pattern p = Pattern.compile(""); String s[]; @@ -152,6 +155,7 @@ public class SplitTest extends TestCase { assertEquals("", s[5]); } + @Test public void testSplitSupplementaryWithEmptyString() { /* @@ -161,12 +165,12 @@ public class SplitTest extends TestCase { Pattern p = Pattern.compile(""); String s[]; s = p.split("a\ud869\uded6b", -1); - // TODO investigate, why this fails and uncomment - /*assertEquals(5, s.length); + assertEquals(6, s.length); assertEquals("", s[0]); assertEquals("a", s[1]); - assertEquals("\ud869\uded6", s[2]); - assertEquals("b", s[3]); - assertEquals("", s[4]);*/ + assertEquals("\ud869", s[2]); + assertEquals("\uded6", s[3]); + assertEquals("b", s[4]); + assertEquals("", s[5]); } } From 78f4f7a1fffd72f2cd7dd3ac005f4011b2c1e4a6 Mon Sep 17 00:00:00 2001 From: Alexey Andreev Date: Sun, 12 Oct 2014 13:50:56 +0400 Subject: [PATCH 3/4] Add regex methods into java.lang.String. Add group index checking into java.util.regex.Matcher --- .../java/org/teavm/classlib/java/lang/TString.java | 13 +++++++++++++ .../teavm/classlib/java/util/regex/TMatcher.java | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java index e8dcf653f..b636dac46 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java @@ -22,6 +22,7 @@ import org.teavm.classlib.java.util.TArrays; import org.teavm.classlib.java.util.TComparator; import org.teavm.classlib.java.util.THashMap; import org.teavm.classlib.java.util.TMap; +import org.teavm.classlib.java.util.regex.TPattern; import org.teavm.dependency.PluggableDependency; import org.teavm.javascript.ni.InjectedBy; import org.teavm.javascript.ni.Rename; @@ -613,4 +614,16 @@ public class TString extends TObject implements TSerializable, TComparable matchResult.groupCount()) { + throw new IndexOutOfBoundsException("Index " + group + " if out of range [0; " + + matchResult.groupCount() + ")"); + } return matchResult.group(group); } From e1d5b45750c2c37c3e03e8ff8b6e5dc1074cbc77 Mon Sep 17 00:00:00 2001 From: konsoletyper Date: Sat, 25 Oct 2014 16:08:41 +0400 Subject: [PATCH 4/4] Add some methods to String --- .../org/teavm/classlib/java/lang/TString.java | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java index b636dac46..a11ed0585 100644 --- a/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java +++ b/teavm-classlib/src/main/java/org/teavm/classlib/java/lang/TString.java @@ -162,6 +162,18 @@ public class TString extends TObject implements TSerializable, TComparable length() || ooffset + len > other.length()) { + return false; + } + for (int i = 0; i < len; ++i) { + char a = charAt(toffset++); + char b = other.charAt(ooffset++); + if (ignoreCase) { + a = TCharacter.toLowerCase(a); + b = TCharacter.toLowerCase(b); + } + if (a != b) { + return false; + } + } + return true; + } + public boolean regionMatches(int toffset, TString other, int ooffset, int len) { if (toffset < 0 || ooffset < 0 || toffset + len > length() || ooffset + len > other.length()) { return false; @@ -626,4 +656,12 @@ public class TString extends TObject implements TSerializable, TComparable