Merge branch 'regex'

This commit is contained in:
konsoletyper 2014-10-25 16:09:35 +04:00
commit b93c71ad41
103 changed files with 17025 additions and 0 deletions

View File

@ -118,6 +118,7 @@
<argument>java.util</argument>
<argument>java.util.logging</argument>
<argument>java.util.concurrent</argument>
<argument>java.util.regex</argument>
<argument>-output</argument>
<argument>${project.build.directory}/jcl-report</argument>
</arguments>

View File

@ -22,6 +22,7 @@ import org.teavm.classlib.java.util.TArrays;
import org.teavm.classlib.java.util.TComparator;
import org.teavm.classlib.java.util.THashMap;
import org.teavm.classlib.java.util.TMap;
import org.teavm.classlib.java.util.regex.TPattern;
import org.teavm.dependency.PluggableDependency;
import org.teavm.javascript.ni.InjectedBy;
import org.teavm.javascript.ni.Rename;
@ -161,6 +162,18 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
}
public boolean contentEquals(TStringBuffer buffer) {
if (characters.length != buffer.length()) {
return false;
}
for (int i = 0; i < characters.length; ++i) {
if (characters[i] != buffer.charAt(i)) {
return false;
}
}
return true;
}
public boolean contentEquals(TCharSequence charSeq) {
if (this == charSeq) {
return true;
@ -226,6 +239,24 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
return startsWith(prefix, 0);
}
public boolean regionMatches(boolean ignoreCase, int toffset, String other, int ooffset, int len) {
if (toffset < 0 || ooffset < 0 || toffset + len > length() || ooffset + len > other.length()) {
return false;
}
for (int i = 0; i < len; ++i) {
char a = charAt(toffset++);
char b = other.charAt(ooffset++);
if (ignoreCase) {
a = TCharacter.toLowerCase(a);
b = TCharacter.toLowerCase(b);
}
if (a != b) {
return false;
}
}
return true;
}
public boolean regionMatches(int toffset, TString other, int ooffset, int len) {
if (toffset < 0 || ooffset < 0 || toffset + len > length() || ooffset + len > other.length()) {
return false;
@ -613,4 +644,24 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
}
return interned;
}
public boolean matches(String regex) {
return TPattern.matches(regex, this.toString());
}
public String[] split(String regex) {
return TPattern.compile(regex).split(this.toString());
}
public String[] split(String regex, int limit) {
return TPattern.compile(regex).split(this.toString(), limit);
}
public String replaceAll(String regex, String replacement) {
return TPattern.compile(regex).matcher(toString()).replaceAll(replacement);
}
public String replaceFirst(String regex, String replacement) {
return TPattern.compile(regex).matcher(toString()).replaceFirst(replacement);
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util;
import org.teavm.classlib.java.lang.TRuntimeException;
import org.teavm.classlib.java.lang.TString;
/**
*
* @author Alexey Andreev
*/
public class TMissingResourceException extends TRuntimeException {
private static final long serialVersionUID = 6730397307327337970L;
private String className;
private String key;
public TMissingResourceException(String s, String className, String key) {
super(TString.wrap(s));
this.className = className;
this.key = key;
}
public String getClassName() {
return className;
}
public String getKey() {
return key;
}
}

View File

@ -0,0 +1,896 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.BitSet;
/**
* This class represents character classes, i.e. sets of character either
* predefined or user defined.
*
* Note, this class represent token, not node, so being constructed by lexer.
*
* @author Nikolay A. Kuznetsov
*/
abstract class AbstractCharClass extends SpecialToken {
protected boolean alt;
protected boolean altSurrogates;
// Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
static int SURROGATE_CARDINALITY = 2048;
BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
AbstractCharClass charClassWithoutSurrogates = null;
AbstractCharClass charClassWithSurrogates = null;
static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
/*
* Indicates if this class may contain supplementary Unicode codepoints. If
* this flag is specified it doesn't mean that this class contains
* supplementary characters but may contain.
*/
protected boolean mayContainSupplCodepoints = false;
/**
* Returns true if this char class contains character specified;
*
* @param ch
* character to check;
*/
abstract public boolean contains(int ch);
/**
* Returns BitSet representing this character class or <code>null</code> if
* this character class does not have character representation;
*
* @return bitset
*/
protected BitSet getBits() {
return null;
}
protected BitSet getLowHighSurrogates() {
return lowHighSurrogates;
}
public boolean hasLowHighSurrogates() {
return altSurrogates ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY : lowHighSurrogates
.nextSetBit(0) < SURROGATE_CARDINALITY;
}
public boolean mayContainSupplCodepoints() {
return mayContainSupplCodepoints;
}
@Override
public int getType() {
return SpecialToken.TOK_CHARCLASS;
}
public AbstractCharClass getInstance() {
return this;
}
public AbstractCharClass getSurrogates() {
if (charClassWithSurrogates == null) {
final BitSet lHS = getLowHighSurrogates();
charClassWithSurrogates = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
int index = ch - Character.MIN_SURROGATE;
return ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^
lHS.get(index) : false;
}
};
charClassWithSurrogates.setNegative(this.altSurrogates);
}
return charClassWithSurrogates;
}
public AbstractCharClass getWithoutSurrogates() {
if (charClassWithoutSurrogates == null) {
final BitSet lHS = getLowHighSurrogates();
final AbstractCharClass thisClass = this;
charClassWithoutSurrogates = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
int index = ch - Character.MIN_SURROGATE;
boolean containslHS = ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^
lHS.get(index)
: false;
return thisClass.contains(ch) && !containslHS;
}
};
charClassWithoutSurrogates.setNegative(isNegative());
charClassWithoutSurrogates.mayContainSupplCodepoints = mayContainSupplCodepoints;
}
return charClassWithoutSurrogates;
}
public boolean hasUCI() {
return false;
}
/**
* Sets this CharClass to negative form, i.e. if they will add some
* characters and after that set this class to negative it will accept all
* the characters except previously set ones.
*
* Although this method will not alternate all the already set characters,
* just overall meaning of the class.
*
* @see #contains(int)
* @see #intersect(CharClass)
* @see #union(CharClass)
*/
public AbstractCharClass setNegative(boolean value) {
if (alt ^ value) {
alt = !alt;
altSurrogates = !altSurrogates;
}
if (!mayContainSupplCodepoints) {
mayContainSupplCodepoints = true;
}
return this;
}
public boolean isNegative() {
return alt;
}
// -----------------------------------------------------------------
// Static methods and predefined classes
// -----------------------------------------------------------------
public static boolean intersects(int ch1, int ch2) {
return ch1 == ch2;
}
public static boolean intersects(AbstractCharClass cc, int ch) {
return cc.contains(ch);
}
public static boolean intersects(AbstractCharClass cc1, AbstractCharClass cc2) {
if (cc1.getBits() == null || cc2.getBits() == null)
return true;
return cc1.getBits().intersects(cc2.getBits());
}
public static AbstractCharClass getPredefinedClass(String name, boolean negative) {
return ((LazyCharClass)charClasses.getObject(name)).getValue(negative);
}
abstract static class LazyCharClass {
AbstractCharClass posValue = null;
AbstractCharClass negValue = null;
public AbstractCharClass getValue(boolean negative) {
if (!negative && posValue == null) {
posValue = computeValue();
} else if (negative && negValue == null) {
negValue = computeValue().setNegative(true);
}
if (!negative)
return posValue;
return negValue;
}
protected abstract AbstractCharClass computeValue();
}
static class LazyDigit extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('0', '9');
}
}
static class LazyNonDigit extends LazyDigit {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = super.computeValue().setNegative(true);
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazySpace extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
/* 9-13 - \t\n\x0B\f\r; 32 - ' ' */
return new CharClass().add(9, 13).add(32);
}
}
static class LazyNonSpace extends LazySpace {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = super.computeValue().setNegative(true);
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyWord extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('a', 'z').add('A', 'Z').add('0', '9').add('_');
}
}
static class LazyNonWord extends LazyWord {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = super.computeValue().setNegative(true);
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyLower extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('a', 'z');
}
}
static class LazyUpper extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('A', 'Z');
}
}
static class LazyASCII extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add(0x00, 0x7F);
}
}
static class LazyAlpha extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('a', 'z').add('A', 'Z');
}
}
static class LazyAlnum extends LazyAlpha {
@Override
protected AbstractCharClass computeValue() {
return ((CharClass)super.computeValue()).add('0', '9');
}
}
static class LazyPunct extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
/* Punctuation !"#$%&'()*+,-./:;<=>?@ [\]^_` {|}~ */
return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E);
}
}
static class LazyGraph extends LazyAlnum {
@Override
protected AbstractCharClass computeValue() {
/* plus punctuation */
return ((CharClass)super.computeValue()).add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E);
}
}
static class LazyPrint extends LazyGraph {
@Override
protected AbstractCharClass computeValue() {
return ((CharClass)super.computeValue()).add(0x20);
}
}
static class LazyBlank extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add(' ').add('\t');
}
}
static class LazyCntrl extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add(0x00, 0x1F).add(0x7F);
}
}
static class LazyXDigit extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new CharClass().add('0', '9').add('a', 'f').add('A', 'F');
}
}
static class LazyRange extends LazyCharClass {
int start, end;
public LazyRange(int start, int end) {
this.start = start;
this.end = end;
}
@Override
public AbstractCharClass computeValue() {
AbstractCharClass chCl = new CharClass().add(start, end);
return chCl;
}
}
static class LazySpecialsBlock extends LazyCharClass {
@Override
public AbstractCharClass computeValue() {
return new CharClass().add(0xFEFF, 0xFEFF).add(0xFFF0, 0xFFFD);
}
}
static class LazyCategoryScope extends LazyCharClass {
int category;
boolean mayContainSupplCodepoints;
boolean containsAllSurrogates;
public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) {
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
this.category = cat;
}
public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) {
this.containsAllSurrogates = containsAllSurrogates;
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
this.category = cat;
}
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new UnicodeCategoryScope(category);
if (containsAllSurrogates) {
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
}
chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
return chCl;
}
}
static class LazyCategory extends LazyCharClass {
int category;
boolean mayContainSupplCodepoints;
boolean containsAllSurrogates;
public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
this.category = cat;
}
public LazyCategory(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) {
this.containsAllSurrogates = containsAllSurrogates;
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
this.category = cat;
}
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new UnicodeCategory(category);
if (containsAllSurrogates) {
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
}
chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
;
return chCl;
}
}
static class LazyJavaLowerCase extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isLowerCase(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaUpperCase extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isUpperCase(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaWhitespace extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isWhitespace(ch);
}
};
}
}
static class LazyJavaMirrored extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
@Override
public boolean contains(int ch) {
// TODO implement this method and uncomment
// return Character.isMirrored(ch);
return false;
}
};
}
}
static class LazyJavaDefined extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isDefined(ch);
}
};
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaDigit extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isDigit(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaIdentifierIgnorable extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isIdentifierIgnorable(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaISOControl extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isISOControl(ch);
}
};
}
}
static class LazyJavaJavaIdentifierPart extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isJavaIdentifierPart(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaJavaIdentifierStart extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isJavaIdentifierStart(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaLetter extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isLetter(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaLetterOrDigit extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isLetterOrDigit(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaSpaceChar extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isSpaceChar(ch);
}
};
}
}
static class LazyJavaTitleCase extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isTitleCase(ch);
}
};
}
}
static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isUnicodeIdentifierPart(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
@Override
protected AbstractCharClass computeValue() {
AbstractCharClass chCl = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return Character.isUnicodeIdentifierStart(ch);
}
};
chCl.mayContainSupplCodepoints = true;
return chCl;
}
}
/**
* character classes generated from http://www.unicode.org/reports/tr18/
* http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt
*/
static final class PredefinedCharacterClasses {
static LazyCharClass space = new LazySpace();
static LazyCharClass digit = new LazyDigit();
static final Object[][] contents = {
{ "Lower", new LazyLower() }, //$NON-NLS-1$
{ "Upper", new LazyUpper() }, //$NON-NLS-1$
{ "ASCII", new LazyASCII() }, //$NON-NLS-1$
{ "Alpha", new LazyAlpha() }, //$NON-NLS-1$
{ "Digit", digit }, //$NON-NLS-1$
{ "Alnum", new LazyAlnum() }, //$NON-NLS-1$
{ "Punct", new LazyPunct() }, //$NON-NLS-1$
{ "Graph", new LazyGraph() }, //$NON-NLS-1$
{ "Print", new LazyPrint() }, //$NON-NLS-1$
{ "Blank", new LazyBlank() }, //$NON-NLS-1$
{ "Cntrl", new LazyCntrl() }, //$NON-NLS-1$
{ "XDigit", new LazyXDigit() }, //$NON-NLS-1$
{ "javaLowerCase", new LazyJavaLowerCase() }, //$NON-NLS-1$
{ "javaUpperCase", new LazyJavaUpperCase() }, //$NON-NLS-1$
{ "javaWhitespace", new LazyJavaWhitespace() }, //$NON-NLS-1$
{ "javaMirrored", new LazyJavaMirrored() }, //$NON-NLS-1$
{ "javaDefined", new LazyJavaDefined() }, //$NON-NLS-1$
{ "javaDigit", new LazyJavaDigit() }, //$NON-NLS-1$
{ "javaIdentifierIgnorable", new LazyJavaIdentifierIgnorable() }, //$NON-NLS-1$
{ "javaISOControl", new LazyJavaISOControl() }, //$NON-NLS-1$
{ "javaJavaIdentifierPart", new LazyJavaJavaIdentifierPart() }, //$NON-NLS-1$
{ "javaJavaIdentifierStart", new LazyJavaJavaIdentifierStart() }, //$NON-NLS-1$
{ "javaLetter", new LazyJavaLetter() }, //$NON-NLS-1$
{ "javaLetterOrDigit", new LazyJavaLetterOrDigit() }, //$NON-NLS-1$
{ "javaSpaceChar", new LazyJavaSpaceChar() }, //$NON-NLS-1$
{ "javaTitleCase", new LazyJavaTitleCase() }, //$NON-NLS-1$
{ "javaUnicodeIdentifierPart", new LazyJavaUnicodeIdentifierPart() }, //$NON-NLS-1$
{ "javaUnicodeIdentifierStart", new LazyJavaUnicodeIdentifierStart() }, //$NON-NLS-1$
{ "Space", space }, //$NON-NLS-1$
{ "w", new LazyWord() }, //$NON-NLS-1$
{ "W", new LazyNonWord() }, //$NON-NLS-1$
{ "s", space }, //$NON-NLS-1$
{ "S", new LazyNonSpace() }, //$NON-NLS-1$
{ "d", digit }, //$NON-NLS-1$
{ "D", new LazyNonDigit() }, //$NON-NLS-1$
{ "BasicLatin", new LazyRange(0x0000, 0x007F) }, //$NON-NLS-1$
{ "Latin-1Supplement", new LazyRange(0x0080, 0x00FF) }, //$NON-NLS-1$
{ "LatinExtended-A", new LazyRange(0x0100, 0x017F) }, //$NON-NLS-1$
{ "LatinExtended-B", new LazyRange(0x0180, 0x024F) }, //$NON-NLS-1$
{ "IPAExtensions", new LazyRange(0x0250, 0x02AF) }, //$NON-NLS-1$
{ "SpacingModifierLetters", new LazyRange(0x02B0, 0x02FF) }, //$NON-NLS-1$
{ "CombiningDiacriticalMarks", new LazyRange(0x0300, 0x036F) }, //$NON-NLS-1$
{ "Greek", new LazyRange(0x0370, 0x03FF) }, //$NON-NLS-1$
{ "Cyrillic", new LazyRange(0x0400, 0x04FF) }, //$NON-NLS-1$
{ "CyrillicSupplement", new LazyRange(0x0500, 0x052F) }, //$NON-NLS-1$
{ "Armenian", new LazyRange(0x0530, 0x058F) }, //$NON-NLS-1$
{ "Hebrew", new LazyRange(0x0590, 0x05FF) }, //$NON-NLS-1$
{ "Arabic", new LazyRange(0x0600, 0x06FF) }, //$NON-NLS-1$
{ "Syriac", new LazyRange(0x0700, 0x074F) }, //$NON-NLS-1$
{ "ArabicSupplement", new LazyRange(0x0750, 0x077F) }, //$NON-NLS-1$
{ "Thaana", new LazyRange(0x0780, 0x07BF) }, //$NON-NLS-1$
{ "Devanagari", new LazyRange(0x0900, 0x097F) }, //$NON-NLS-1$
{ "Bengali", new LazyRange(0x0980, 0x09FF) }, //$NON-NLS-1$
{ "Gurmukhi", new LazyRange(0x0A00, 0x0A7F) }, //$NON-NLS-1$
{ "Gujarati", new LazyRange(0x0A80, 0x0AFF) }, //$NON-NLS-1$
{ "Oriya", new LazyRange(0x0B00, 0x0B7F) }, //$NON-NLS-1$
{ "Tamil", new LazyRange(0x0B80, 0x0BFF) }, //$NON-NLS-1$
{ "Telugu", new LazyRange(0x0C00, 0x0C7F) }, //$NON-NLS-1$
{ "Kannada", new LazyRange(0x0C80, 0x0CFF) }, //$NON-NLS-1$
{ "Malayalam", new LazyRange(0x0D00, 0x0D7F) }, //$NON-NLS-1$
{ "Sinhala", new LazyRange(0x0D80, 0x0DFF) }, //$NON-NLS-1$
{ "Thai", new LazyRange(0x0E00, 0x0E7F) }, //$NON-NLS-1$
{ "Lao", new LazyRange(0x0E80, 0x0EFF) }, //$NON-NLS-1$
{ "Tibetan", new LazyRange(0x0F00, 0x0FFF) }, //$NON-NLS-1$
{ "Myanmar", new LazyRange(0x1000, 0x109F) }, //$NON-NLS-1$
{ "Georgian", new LazyRange(0x10A0, 0x10FF) }, //$NON-NLS-1$
{ "HangulJamo", new LazyRange(0x1100, 0x11FF) }, //$NON-NLS-1$
{ "Ethiopic", new LazyRange(0x1200, 0x137F) }, //$NON-NLS-1$
{ "EthiopicSupplement", new LazyRange(0x1380, 0x139F) }, //$NON-NLS-1$
{ "Cherokee", new LazyRange(0x13A0, 0x13FF) }, //$NON-NLS-1$
{ "UnifiedCanadianAboriginalSyllabics", //$NON-NLS-1$
new LazyRange(0x1400, 0x167F) },
{ "Ogham", new LazyRange(0x1680, 0x169F) }, //$NON-NLS-1$
{ "Runic", new LazyRange(0x16A0, 0x16FF) }, //$NON-NLS-1$
{ "Tagalog", new LazyRange(0x1700, 0x171F) }, //$NON-NLS-1$
{ "Hanunoo", new LazyRange(0x1720, 0x173F) }, //$NON-NLS-1$
{ "Buhid", new LazyRange(0x1740, 0x175F) }, //$NON-NLS-1$
{ "Tagbanwa", new LazyRange(0x1760, 0x177F) }, //$NON-NLS-1$
{ "Khmer", new LazyRange(0x1780, 0x17FF) }, //$NON-NLS-1$
{ "Mongolian", new LazyRange(0x1800, 0x18AF) }, //$NON-NLS-1$
{ "Limbu", new LazyRange(0x1900, 0x194F) }, //$NON-NLS-1$
{ "TaiLe", new LazyRange(0x1950, 0x197F) }, //$NON-NLS-1$
{ "NewTaiLue", new LazyRange(0x1980, 0x19DF) }, //$NON-NLS-1$
{ "KhmerSymbols", new LazyRange(0x19E0, 0x19FF) }, //$NON-NLS-1$
{ "Buginese", new LazyRange(0x1A00, 0x1A1F) }, //$NON-NLS-1$
{ "PhoneticExtensions", new LazyRange(0x1D00, 0x1D7F) }, //$NON-NLS-1$
{ "PhoneticExtensionsSupplement", new LazyRange(0x1D80, 0x1DBF) }, //$NON-NLS-1$
{ "CombiningDiacriticalMarksSupplement", //$NON-NLS-1$
new LazyRange(0x1DC0, 0x1DFF) },
{ "LatinExtendedAdditional", new LazyRange(0x1E00, 0x1EFF) }, //$NON-NLS-1$
{ "GreekExtended", new LazyRange(0x1F00, 0x1FFF) }, //$NON-NLS-1$
{ "GeneralPunctuation", new LazyRange(0x2000, 0x206F) }, //$NON-NLS-1$
{ "SuperscriptsandSubscripts", new LazyRange(0x2070, 0x209F) }, //$NON-NLS-1$
{ "CurrencySymbols", new LazyRange(0x20A0, 0x20CF) }, //$NON-NLS-1$
{ "CombiningMarksforSymbols", new LazyRange(0x20D0, 0x20FF) }, //$NON-NLS-1$
{ "LetterlikeSymbols", new LazyRange(0x2100, 0x214F) }, //$NON-NLS-1$
{ "NumberForms", new LazyRange(0x2150, 0x218F) }, //$NON-NLS-1$
{ "Arrows", new LazyRange(0x2190, 0x21FF) }, //$NON-NLS-1$
{ "MathematicalOperators", new LazyRange(0x2200, 0x22FF) }, //$NON-NLS-1$
{ "MiscellaneousTechnical", new LazyRange(0x2300, 0x23FF) }, //$NON-NLS-1$
{ "ControlPictures", new LazyRange(0x2400, 0x243F) }, //$NON-NLS-1$
{ "OpticalCharacterRecognition", new LazyRange(0x2440, 0x245F) }, //$NON-NLS-1$
{ "EnclosedAlphanumerics", new LazyRange(0x2460, 0x24FF) }, //$NON-NLS-1$
{ "BoxDrawing", new LazyRange(0x2500, 0x257F) }, //$NON-NLS-1$
{ "BlockElements", new LazyRange(0x2580, 0x259F) }, //$NON-NLS-1$
{ "GeometricShapes", new LazyRange(0x25A0, 0x25FF) }, //$NON-NLS-1$
{ "MiscellaneousSymbols", new LazyRange(0x2600, 0x26FF) }, //$NON-NLS-1$
{ "Dingbats", new LazyRange(0x2700, 0x27BF) }, //$NON-NLS-1$
{ "MiscellaneousMathematicalSymbols-A", //$NON-NLS-1$
new LazyRange(0x27C0, 0x27EF) },
{ "SupplementalArrows-A", new LazyRange(0x27F0, 0x27FF) }, //$NON-NLS-1$
{ "BraillePatterns", new LazyRange(0x2800, 0x28FF) }, //$NON-NLS-1$
{ "SupplementalArrows-B", new LazyRange(0x2900, 0x297F) }, //$NON-NLS-1$
{ "MiscellaneousMathematicalSymbols-B", //$NON-NLS-1$
new LazyRange(0x2980, 0x29FF) },
{ "SupplementalMathematicalOperators", //$NON-NLS-1$
new LazyRange(0x2A00, 0x2AFF) },
{ "MiscellaneousSymbolsandArrows", //$NON-NLS-1$
new LazyRange(0x2B00, 0x2BFF) },
{ "Glagolitic", new LazyRange(0x2C00, 0x2C5F) }, //$NON-NLS-1$
{ "Coptic", new LazyRange(0x2C80, 0x2CFF) }, //$NON-NLS-1$
{ "GeorgianSupplement", new LazyRange(0x2D00, 0x2D2F) }, //$NON-NLS-1$
{ "Tifinagh", new LazyRange(0x2D30, 0x2D7F) }, //$NON-NLS-1$
{ "EthiopicExtended", new LazyRange(0x2D80, 0x2DDF) }, //$NON-NLS-1$
{ "SupplementalPunctuation", new LazyRange(0x2E00, 0x2E7F) }, //$NON-NLS-1$
{ "CJKRadicalsSupplement", new LazyRange(0x2E80, 0x2EFF) }, //$NON-NLS-1$
{ "KangxiRadicals", new LazyRange(0x2F00, 0x2FDF) }, //$NON-NLS-1$
{ "IdeographicDescriptionCharacters", //$NON-NLS-1$
new LazyRange(0x2FF0, 0x2FFF) },
{ "CJKSymbolsandPunctuation", new LazyRange(0x3000, 0x303F) }, //$NON-NLS-1$
{ "Hiragana", new LazyRange(0x3040, 0x309F) }, //$NON-NLS-1$
{ "Katakana", new LazyRange(0x30A0, 0x30FF) }, //$NON-NLS-1$
{ "Bopomofo", new LazyRange(0x3100, 0x312F) }, //$NON-NLS-1$
{ "HangulCompatibilityJamo", new LazyRange(0x3130, 0x318F) }, //$NON-NLS-1$
{ "Kanbun", new LazyRange(0x3190, 0x319F) }, //$NON-NLS-1$
{ "BopomofoExtended", new LazyRange(0x31A0, 0x31BF) }, //$NON-NLS-1$
{ "CJKStrokes", new LazyRange(0x31C0, 0x31EF) }, //$NON-NLS-1$
{ "KatakanaPhoneticExtensions", new LazyRange(0x31F0, 0x31FF) }, //$NON-NLS-1$
{ "EnclosedCJKLettersandMonths", new LazyRange(0x3200, 0x32FF) }, //$NON-NLS-1$
{ "CJKCompatibility", new LazyRange(0x3300, 0x33FF) }, //$NON-NLS-1$
{ "CJKUnifiedIdeographsExtensionA", //$NON-NLS-1$
new LazyRange(0x3400, 0x4DB5) },
{ "YijingHexagramSymbols", new LazyRange(0x4DC0, 0x4DFF) }, //$NON-NLS-1$
{ "CJKUnifiedIdeographs", new LazyRange(0x4E00, 0x9FFF) }, //$NON-NLS-1$
{ "YiSyllables", new LazyRange(0xA000, 0xA48F) }, //$NON-NLS-1$
{ "YiRadicals", new LazyRange(0xA490, 0xA4CF) }, //$NON-NLS-1$
{ "ModifierToneLetters", new LazyRange(0xA700, 0xA71F) }, //$NON-NLS-1$
{ "SylotiNagri", new LazyRange(0xA800, 0xA82F) }, //$NON-NLS-1$
{ "HangulSyllables", new LazyRange(0xAC00, 0xD7A3) }, //$NON-NLS-1$
{ "HighSurrogates", new LazyRange(0xD800, 0xDB7F) }, //$NON-NLS-1$
{ "HighPrivateUseSurrogates", new LazyRange(0xDB80, 0xDBFF) }, //$NON-NLS-1$
{ "LowSurrogates", new LazyRange(0xDC00, 0xDFFF) }, //$NON-NLS-1$
{ "PrivateUseArea", new LazyRange(0xE000, 0xF8FF) }, //$NON-NLS-1$
{ "CJKCompatibilityIdeographs", new LazyRange(0xF900, 0xFAFF) }, //$NON-NLS-1$
{ "AlphabeticPresentationForms", new LazyRange(0xFB00, 0xFB4F) }, //$NON-NLS-1$
{ "ArabicPresentationForms-A", new LazyRange(0xFB50, 0xFDFF) }, //$NON-NLS-1$
{ "VariationSelectors", new LazyRange(0xFE00, 0xFE0F) }, //$NON-NLS-1$
{ "VerticalForms", new LazyRange(0xFE10, 0xFE1F) }, //$NON-NLS-1$
{ "CombiningHalfMarks", new LazyRange(0xFE20, 0xFE2F) }, //$NON-NLS-1$
{ "CJKCompatibilityForms", new LazyRange(0xFE30, 0xFE4F) }, //$NON-NLS-1$
{ "SmallFormVariants", new LazyRange(0xFE50, 0xFE6F) }, //$NON-NLS-1$
{ "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
{ "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
{ "all", new LazyRange(0x00, 0x10FFFF) }, //$NON-NLS-1$
{ "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
{ "Cn", new LazyCategory(Character.UNASSIGNED, true) },
{ "IsL", new LazyCategoryScope(0x3E, true) },
{ "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) },
{ "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) },
{ "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) },
{ "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) },
{ "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
{ "IsM", new LazyCategoryScope(0x1C0, true) },
{ "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) },
{ "Me", new LazyCategory(Character.ENCLOSING_MARK, false) },
{ "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) },
{ "N", new LazyCategoryScope(0xE00, true) },
{ "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) },
{ "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
{ "No", new LazyCategory(Character.OTHER_NUMBER, true) },
{ "IsZ", new LazyCategoryScope(0x7000, false) },
{ "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) },
{ "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) },
{ "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) },
{ "IsC", new LazyCategoryScope(0xF0000, true, true) },
{ "Cc", new LazyCategory(Character.CONTROL, false) },
{ "Cf", new LazyCategory(Character.FORMAT, true) },
{ "Co", new LazyCategory(Character.PRIVATE_USE, true) },
{ "Cs", new LazyCategory(Character.SURROGATE, false, true) },
{
"IsP",
new LazyCategoryScope((1 << Character.DASH_PUNCTUATION) | (1 << Character.START_PUNCTUATION) |
(1 << Character.END_PUNCTUATION) | (1 << Character.CONNECTOR_PUNCTUATION) |
(1 << Character.OTHER_PUNCTUATION) | (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
(1 << Character.FINAL_QUOTE_PUNCTUATION), true) },
{ "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) },
{ "Ps", new LazyCategory(Character.START_PUNCTUATION, false) },
{ "Pe", new LazyCategory(Character.END_PUNCTUATION, false) },
{ "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) },
{ "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) },
{ "IsS", new LazyCategoryScope(0x7E000000, true) },
{ "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
{ "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) },
{ "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) },
{ "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
{ "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) },
{ "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } };
public Object getObject(String name) {
for (int i = 0; i < contents.length; ++i) {
Object[] row = contents[i];
if (name.equals(row[0])) {
return row[1];
}
}
return null;
}
}
}

View File

@ -0,0 +1,85 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Line terminator factory
*
* @author Nikolay A. Kuznetsov
*/
abstract class AbstractLineTerminator {
static AbstractLineTerminator unixLT = null;
static AbstractLineTerminator unicodeLT = null;
public abstract boolean isLineTerminator(int ch);
public abstract boolean isAfterLineTerminator(int ch1, int ch2);
public static AbstractLineTerminator getInstance(int flag) {
if ((flag & TPattern.UNIX_LINES) != 0) {
if (unixLT != null)
return unixLT;
unixLT = new AbstractLineTerminator() {
@Override
public boolean isLineTerminator(int ch) {
return ch == '\n';
}
@Override
public boolean isAfterLineTerminator(int ch, int ch2) {
return ch == '\n';
}
};
return unixLT;
} else {
if (unicodeLT != null)
return unicodeLT;
unicodeLT = new AbstractLineTerminator() {
@Override
public boolean isLineTerminator(int ch) {
return (ch == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029');
}
@Override
public boolean isAfterLineTerminator(int ch, int ch2) {
return (ch == '\n' || ch == '\u0085' || (ch | 1) == '\u2029') || (ch == '\r' && ch2 != '\n');
}
};
return unicodeLT;
}
}
}

View File

@ -0,0 +1,272 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Basic class for nodes, representing given regular expression. Note: All the
* classes representing nodes has set prefix;
*
* @author Nikolay A. Kuznetsov
*/
abstract class AbstractSet {
public static final int TYPE_LEAF = 1 << 0;
public static final int TYPE_FSET = 1 << 1;
public static final int TYPE_QUANT = 1 << 3;
public static final int TYPE_DOTSET = 0x80000000 | '.';
/**
* Next node to visit
*/
protected AbstractSet next;
/**
* Counter for debugging purposes, represent unique node index;
*/
static int counter = 1;
protected boolean isSecondPassVisited = false;
protected String index = new Integer(AbstractSet.counter++).toString();
private int type = 0;
public AbstractSet() {
}
public AbstractSet(AbstractSet n) {
next = n;
}
/**
* Checks if this node matches in given position and recursively call next
* node matches on positive self match. Returns positive integer if entire
* match succeed, negative otherwise
*
* @param stringIndex
* - string index to start from;
* @param testString
* - input string
* @param matchResult
* - MatchResult to sore result into
* @return -1 if match fails or n > 0;
*/
public abstract int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult);
/**
* Attempts to apply pattern starting from this set/stringIndex; returns
* index this search was started from, if value is negative, this means that
* this search didn't succeed, additional information could be obtained via
* matchResult;
*
* Note: this is default implementation for find method, it's based on
* matches, subclasses do not have to override find method unless more
* effective find method exists for a particular node type (sequence, i.e.
* substring, for example). Same applies for find back method.
*
* @param stringIndex
* starting index
* @param testString
* string to search in
* @param matchResult
* result of the match
* @return last searched index
*/
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int length = matchResult.getRightBound();
while (stringIndex <= length) {
if (matches(stringIndex, testString, matchResult) >= 0) {
return stringIndex;
} else {
stringIndex++;
}
}
return -1;
}
/**
* @param stringIndex
* - an index, to finish search back (left limit)
* @param startSearch
* - an index to start search from (right limit)
* @param testString
* - test string;
* @param matchResult
* match result
* @return an index to start back search next time if this search fails(new
* left bound); if this search fails the value is negative;
*/
public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) {
while (startSearch >= stringIndex) {
if (matches(startSearch, testString, matchResult) >= 0) {
return startSearch;
} else {
startSearch--;
}
}
return -1;
}
/**
* Returns true, if this node has consumed any characters during positive
* match attempt, for example node representing character always consumes
* one character if it matches. If particular node matches empty sting this
* method will return false;
*
* @param matchResult
* @return
*/
public abstract boolean hasConsumed(MatchResultImpl matchResult);
/**
* Returns name for the particular node type. Used for debugging purposes.
*/
protected abstract String getName();
protected void setType(int type) {
this.type = type;
}
public int getType() {
return this.type;
}
protected String getQualifiedName() {
return "<" + index + ":" + getName() + ">";
}
@Override
public String toString() {
return getQualifiedName();
}
/**
* Returns the next.
*/
public AbstractSet getNext() {
return next;
}
/**
* Sets next abstract set
*
* @param next
* The next to set.
*/
public void setNext(AbstractSet next) {
this.next = next;
}
/**
* Returns true if the given node intersects with this one, false otherwise.
* This method is being used for quantifiers construction, lets consider the
* following regular expression (a|b)*ccc.
*
* (a|b) does not intersects with "ccc" and thus can be quantified greedily
* (w/o kickbacks), like *+ instead of *.
*
* @param set
* - usually previous node
*
* @return true if the given node intersects with this one
*/
public boolean first(AbstractSet set) {
return true;
}
/**
* This method is used for replacement backreferenced sets.
*
* @param prev
* - node who references to this node
* @return null if current node need not to be replaced JointSet which is
* replacement of current node otherwise
*/
public JointSet processBackRefReplacement() {
return null;
}
/**
* This method is used for traversing nodes after the first stage of
* compilation.
*/
public void processSecondPass() {
this.isSecondPassVisited = true;
if (next != null) {
if (!next.isSecondPassVisited) {
/*
* Add here code to do during the pass
*/
JointSet set = next.processBackRefReplacement();
if (set != null) {
next.isSecondPassVisited = true;
next = set;
}
/*
* End code to do during the pass
*/
next.processSecondPass();
} else {
/*
* We reach node through next but it is already traversed. You
* can see this situation for AltGroupQuantifierSet.next when we
* reach this node through AltGroupQuantifierSet.innerset. ...
* .next
*/
/*
* Add here code to do during the pass
*/
if (next instanceof SingleSet && ((FSet)((JointSet)next).fSet).isBackReferenced) {
next = next.next;
}
/*
* End code to do during the pass
*/
}
}
}
}

View File

@ -0,0 +1,57 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* LookAhead FSet, always returns true;
*
* @author Nikolay A. Kuznetsov
*/
class AheadFSet extends FSet {
public AheadFSet() {
super(-1);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
return stringIndex;
}
@Override
protected String getName() {
return "AheadFSet";
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents "?" quantifier over composite sets.
*
* @author Nikolay A. Kuznetsov
*/
class AltGroupQuantifierSet extends GroupQuantifierSet {
public AltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
if (nextIndex < 0) {
return next.matches(stringIndex, testString, matchResult);
} else {
return nextIndex;
}
}
@Override
public void setNext(AbstractSet next) {
super.setNext(next);
innerSet.setNext(next);
}
}

View File

@ -0,0 +1,65 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents "?" quantifier over leaf sets.
*
* @author Nikolay A. Kuznetsov
*/
class AltQuantifierSet extends LeafQuantifierSet {
public AltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift = 0;
if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) {
return shift;
} else {
return next.matches(stringIndex, testString, matchResult);
}
}
@Override
public void setNext(AbstractSet next) {
super.setNext(next);
innerSet.setNext(next);
}
}

View File

@ -0,0 +1,72 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* @author Nikolay A. Kuznetsov
*/
class AtomicFSet extends FSet {
int index;
public AtomicFSet(int groupIndex) {
super(groupIndex);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int gr = getGroupIndex();
matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr));
index = stringIndex;
return stringIndex;
}
public int getIndex() {
return index;
}
@Override
protected String getName() {
return "AtomicFSet";
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return false;
}
}

View File

@ -0,0 +1,88 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* This class represent atomic group (?>X), once X matches, this match become
* unchangeable till the end of the match.
*
* @author Nikolay A. Kuznetsov
*/
class AtomicJointSet extends NonCapJointSet {
public AtomicJointSet(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int start = matchResult.getConsumed(groupIndex);
matchResult.setConsumed(groupIndex, stringIndex);
int size = children.size();
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
int shift = e.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
// AtomicFset always returns true, but saves the index to run
// this next.match() from;
return next.matches(((AtomicFSet)fSet).getIndex(), testString, matchResult);
}
}
matchResult.setConsumed(groupIndex, start);
return -1;
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public AbstractSet getNext() {
return next;
}
@Override
protected String getName() {
return "NonCapJointSet"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,120 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Back reference node, i.e. \1-9;
*
* @author Nikolay A. Kuznetsov
*/
class BackReferenceSet extends CIBackReferenceSet {
public BackReferenceSet(int groupIndex, int consCounter) {
super(groupIndex, consCounter);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
String group = getString(matchResult);
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
return -1;
int shift = testString.toString().startsWith(group, stringIndex) ? group.length() : -1;
if (shift < 0) {
return -1;
}
matchResult.setConsumed(consCounter, shift);
return next.matches(stringIndex + shift, testString, matchResult);
}
@Override
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
String group = getString(matchResult);
int strLength = matchResult.getLeftBound();
if (group == null || (strIndex + group.length()) > strLength)
return -1;
String testStr = testString.toString();
while (strIndex <= strLength) {
strIndex = testStr.indexOf(group, strIndex);
if (strIndex < 0)
return -1;
if (next.matches(strIndex + group.length(), testString, matchResult) >= 0) {
return strIndex;
}
strIndex++;
}
return -1;
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
String group = getString(matchResult);
if (group == null)
return -1;
String testStr = testString.toString();
while (lastIndex >= strIndex) {
lastIndex = testStr.lastIndexOf(group, lastIndex);
if (lastIndex < 0 || lastIndex < strIndex)
return -1;
if (next.matches(lastIndex + group.length(), testString, matchResult) >= 0) {
return lastIndex;
}
lastIndex--;
}
return -1;
}
@Override
public boolean first(AbstractSet set) {
return true;
}
@Override
public String getName() {
return "back reference: " + this.groupIndex; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,121 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Group node over subexpression w/o alternations. This node is used if current
* group is referenced via backreference.
*/
class BackReferencedSingleSet extends SingleSet {
/*
* This class is needed only for overwriting find() and findBack() methods
* of SingleSet class, which is being back referenced. The following example
* explains the need for such substitution: Let's consider the pattern
* ".*(.)\\1". Leading .* works as follows: finds line terminator and runs
* findBack from that point. findBack method in its turn (in contrast to
* matches) sets group boundaries on the back trace. Thus at the point we
* try to match back reference(\\1) groups are not yet set.
*
* To fix this problem we replace backreferenced groups with instances of
* this class, which will use matches instead of find; this will affect
* performance, but ensure correctness of the match.
*/
public BackReferencedSingleSet(AbstractSet child, FSet fSet) {
super(child, fSet);
}
public BackReferencedSingleSet(SingleSet node) {
super(node.kid, ((FSet)node.fSet));
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int res = 0;
int lastIndex = matchResult.getRightBound();
int startSearch = stringIndex;
for (; startSearch <= lastIndex; startSearch++) {
int saveStart = matchResult.getStart(groupIndex);
matchResult.setStart(groupIndex, startSearch);
res = kid.matches(startSearch, testString, matchResult);
if (res >= 0) {
res = startSearch;
break;
} else {
matchResult.setStart(groupIndex, saveStart);
}
}
return res;
}
@Override
public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
int res = 0;
int startSearch = lastIndex;
for (; startSearch >= stringIndex; startSearch--) {
int saveStart = matchResult.getStart(groupIndex);
matchResult.setStart(groupIndex, startSearch);
res = kid.matches(startSearch, testString, matchResult);
if (res >= 0) {
res = startSearch;
break;
} else {
matchResult.setStart(groupIndex, saveStart);
}
}
return res;
}
/**
* This method is used for replacement backreferenced sets.
*
* @param prev
* - node who references to this node
*/
@Override
public JointSet processBackRefReplacement() {
return null;
}
}

View File

@ -0,0 +1,62 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* FSet for lookbehind constructs. Checks if string index saved by corresponding
* jointSet in "consumers" equals to current index and return current string
* index, return -1 otherwise.
*
* @author Nikolay A. Kuznetsov
*/
class BehindFSet extends FSet {
public BehindFSet(int groupIndex) {
super(groupIndex);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int gr = getGroupIndex();
int rightBound = matchResult.getConsumed(gr);
return (rightBound == stringIndex) ? stringIndex : -1;
}
@Override
protected String getName() {
return "BehindFSet"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Case Insensitive back reference node;
*
* @author Nikolay A. Kuznetsov
*/
class CIBackReferenceSet extends JointSet {
protected int referencedGroup;
protected int consCounter;
public CIBackReferenceSet(int groupIndex, int consCounter) {
this.referencedGroup = groupIndex;
this.consCounter = consCounter;
}
public int accepts(int strIndex, CharSequence testString) {
throw new TPatternSyntaxException(strIndex + ", " + testString, "", 0);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
String group = getString(matchResult);
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
return -1;
for (int i = 0; i < group.length(); i++) {
if (group.charAt(i) != testString.charAt(stringIndex + i) &&
TPattern.getSupplement(group.charAt(i)) != testString.charAt(stringIndex + i)) {
return -1;
}
}
matchResult.setConsumed(consCounter, group.length());
return next.matches(stringIndex + group.length(), testString, matchResult);
}
@Override
public AbstractSet getNext() {
return this.next;
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
protected String getString(MatchResultImpl matchResult) {
String res = matchResult.getGroupNoCheck(referencedGroup);
return res;
}
@Override
public String getName() {
return "CI back reference: " + this.groupIndex;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons;
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
matchResult.setConsumed(consCounter, -1);
return res;
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character in case insensitive manner.
*
* @author Nikolay A. Kuznetsov
*/
class CICharSet extends LeafSet {
private char ch;
private char supplement;
public CICharSet(char ch) {
this.ch = ch;
this.supplement = TPattern.getSupplement(ch);
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return (this.ch == testString.charAt(strIndex) || this.supplement == testString.charAt(strIndex)) ? 1 : -1;
}
@Override
protected String getName() {
return "CI " + ch;
}
protected char getChar() {
return ch;
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents case insensitive
* canonical decomposition of
* Unicode character. Is used when
* CANON_EQ flag of Pattern class
* is specified.
*/
class CIDecomposedCharSet extends DecomposedCharSet{
/*
* Just only a stub
*/
public CIDecomposedCharSet(int [] decomp, int decomposedCharLength) {
super(decomp, decomposedCharLength);
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents ASCII case insensitive character sequences.
*
* @author Nikolay A. Kuznetsov
*/
class CISequenceSet extends LeafSet {
private String string = null;
CISequenceSet(StringBuffer substring) {
this.string = substring.toString();
this.charCount = substring.length();
}
@Override
public int accepts(int strIndex, CharSequence testString) {
for (int i = 0; i < string.length(); i++) {
if (string.charAt(i) != testString.charAt(strIndex + i) &&
TPattern.getSupplement(string.charAt(i)) != testString.charAt(strIndex + i)) {
return -1;
}
}
return string.length();
}
@Override
public String getName() {
return "CI sequence: " + string;
}
}

View File

@ -0,0 +1,604 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.BitSet;
/**
* User defined character classes ([abef]). See AbstractCharClass documentation
* for more details.
*
* @author Nikolay A. Kuznetsov
*/
class CharClass extends AbstractCharClass {
// Flag indicates if we add supplement upper/lower case
boolean ci = false;
boolean uci = false;
// Flag indicates if there are unicode supplements
boolean hasUCI = false;
boolean invertedSurrogates = false;
boolean inverted = false;
boolean hideBits = false;
BitSet bits = new BitSet();
AbstractCharClass nonBitSet = null;
public CharClass() {
}
public CharClass(boolean ci, boolean uci) {
this.ci = ci;
this.uci = uci;
}
public CharClass(boolean negative, boolean ci, boolean uci) {
this(ci, uci);
setNegative(negative);
}
/*
* We can use this method safely even if nonBitSet != null due to specific
* of range constructions in regular expressions.
*/
public CharClass add(int ch) {
if (ci) {
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
if (!inverted) {
bits.set(TPattern.getSupplement((char)ch));
} else {
bits.clear(TPattern.getSupplement((char)ch));
}
} else if (uci && ch > 128) {
hasUCI = true;
ch = Character.toLowerCase(Character.toUpperCase(ch));
// return this;
}
}
if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) {
if (!invertedSurrogates) {
lowHighSurrogates.set(ch - Character.MIN_SURROGATE);
} else {
lowHighSurrogates.clear(ch - Character.MIN_SURROGATE);
}
}
if (!inverted) {
bits.set(ch);
} else
bits.clear(ch);
if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) {
mayContainSupplCodepoints = true;
}
return this;
}
/*
* The difference between add(AbstractCharClass) and
* union(AbstractCharClass) is that add() is used for constructions like
* "[^abc\\d]" (this pattern doesn't match "1") while union is used for
* constructions like "[^abc[\\d]]" (this pattern matches "1").
*/
public CharClass add(final AbstractCharClass cc) {
if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) {
mayContainSupplCodepoints = true;
}
if (!invertedSurrogates) {
// A | !B = ! ((A ^ B) & B)
if (cc.altSurrogates) {
lowHighSurrogates.xor(cc.getLowHighSurrogates());
lowHighSurrogates.and(cc.getLowHighSurrogates());
altSurrogates = !altSurrogates;
invertedSurrogates = true;
// A | B
} else {
lowHighSurrogates.or(cc.getLowHighSurrogates());
}
} else {
// !A | !B = !(A & B)
if (cc.altSurrogates) {
lowHighSurrogates.and(cc.getLowHighSurrogates());
// !A | B = !(A & !B)
} else {
lowHighSurrogates.andNot(cc.getLowHighSurrogates());
}
}
if (!hideBits && cc.getBits() != null) {
if (!inverted) {
// A | !B = ! ((A ^ B) & B)
if (cc.isNegative()) {
bits.xor(cc.getBits());
bits.and(cc.getBits());
alt = !alt;
inverted = true;
// A | B
} else {
bits.or(cc.getBits());
}
} else {
// !A | !B = !(A & B)
if (cc.isNegative()) {
bits.and(cc.getBits());
// !A | B = !(A & !B)
} else {
bits.andNot(cc.getBits());
}
}
} else {
final boolean curAlt = alt;
if (nonBitSet == null) {
if (curAlt && !inverted && bits.isEmpty()) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return cc.contains(ch);
}
};
// alt = true;
} else {
/*
* We keep the value of alt unchanged for constructions like
* [^[abc]fgb] by using the formula a ^ b == !a ^ !b.
*/
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !((curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch)));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return (curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch));
}
};
// alt = false
}
}
hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !(curAlt ^ (nb.contains(ch) || cc.contains(ch)));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return curAlt ^ (nb.contains(ch) || cc.contains(ch));
}
};
// alt = false
}
}
}
return this;
}
public CharClass add(int st, int end) {
if (st > end)
throw new IllegalArgumentException();
if (!ci
// no intersection with surrogate characters
&&
(end < Character.MIN_SURROGATE || st > Character.MAX_SURROGATE)) {
if (!inverted) {
bits.set(st, end + 1);
} else {
bits.clear(st, end + 1);
}
} else {
for (int i = st; i < end + 1; i++) {
add(i);
}
}
return this;
}
// OR operation
public void union(final AbstractCharClass clazz) {
if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) {
mayContainSupplCodepoints = true;
}
if (clazz.hasUCI())
this.hasUCI = true;
if (altSurrogates ^ clazz.altSurrogates) {
// !A | B = !(A & !B)
if (altSurrogates) {
lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
// A | !B = !((A ^ B) & B)
} else {
lowHighSurrogates.xor(clazz.getLowHighSurrogates());
lowHighSurrogates.and(clazz.getLowHighSurrogates());
altSurrogates = true;
}
} else {
// !A | !B = !(A & B)
if (altSurrogates) {
lowHighSurrogates.and(clazz.getLowHighSurrogates());
// A | B
} else {
lowHighSurrogates.or(clazz.getLowHighSurrogates());
}
}
if (!hideBits && clazz.getBits() != null) {
if (alt ^ clazz.isNegative()) {
// !A | B = !(A & !B)
if (alt) {
bits.andNot(clazz.getBits());
// A | !B = !((A ^ B) & B)
} else {
bits.xor(clazz.getBits());
bits.and(clazz.getBits());
alt = true;
}
} else {
// !A | !B = !(A & B)
if (alt) {
bits.and(clazz.getBits());
// A | B
} else {
bits.or(clazz.getBits());
}
}
} else {
final boolean curAlt = alt;
if (nonBitSet == null) {
if (!inverted && bits.isEmpty()) {
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !clazz.contains(ch);
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return clazz.contains(ch);
}
};
// alt = false
}
} else {
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !(clazz.contains(ch) || (curAlt ^ bits.get(ch)));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return clazz.contains(ch) || (curAlt ^ bits.get(ch));
}
};
// alt = false
}
}
hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return (curAlt ^ nb.contains(ch)) || clazz.contains(ch);
}
};
// alt = false
}
}
}
}
// AND operation
public void intersection(final AbstractCharClass clazz) {
if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) {
mayContainSupplCodepoints = true;
}
if (clazz.hasUCI())
this.hasUCI = true;
if (altSurrogates ^ clazz.altSurrogates) {
// !A & B = ((A ^ B) & B)
if (altSurrogates) {
lowHighSurrogates.xor(clazz.getLowHighSurrogates());
lowHighSurrogates.and(clazz.getLowHighSurrogates());
altSurrogates = false;
// A & !B
} else {
lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
}
} else {
// !A & !B = !(A | B)
if (altSurrogates) {
lowHighSurrogates.or(clazz.getLowHighSurrogates());
// A & B
} else {
lowHighSurrogates.and(clazz.getLowHighSurrogates());
}
}
if (!hideBits && clazz.getBits() != null) {
if (alt ^ clazz.isNegative()) {
// !A & B = ((A ^ B) & B)
if (alt) {
bits.xor(clazz.getBits());
bits.and(clazz.getBits());
alt = false;
// A & !B
} else {
bits.andNot(clazz.getBits());
}
} else {
// !A & !B = !(A | B)
if (alt) {
bits.or(clazz.getBits());
// A & B
} else {
bits.and(clazz.getBits());
}
}
} else {
final boolean curAlt = alt;
if (nonBitSet == null) {
if (!inverted && bits.isEmpty()) {
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !clazz.contains(ch);
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return clazz.contains(ch);
}
};
// alt = false
}
} else {
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !(clazz.contains(ch) && (curAlt ^ bits.get(ch)));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return clazz.contains(ch) && (curAlt ^ bits.get(ch));
}
};
// alt = false
}
}
hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
if (curAlt) {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch));
}
};
// alt = true
} else {
nonBitSet = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return (curAlt ^ nb.contains(ch)) && clazz.contains(ch);
}
};
// alt = false
}
}
}
}
/**
* Returns <code>true</code> if character class contains symbol specified,
* <code>false</code> otherwise. Note: #setNegative() method changes the
* meaning of contains method;
*
* @param ch
* @return <code>true</code> if character class contains symbol specified;
*
* TODO: currently <code>character class</code> implementation based
* on BitSet, but this implementation possibly will be turned to
* combined BitSet(for first 256 symbols) and Black/Red tree for the
* rest of UTF.
*/
@Override
public boolean contains(int ch) {
if (nonBitSet == null) {
return this.alt ^ bits.get(ch);
} else {
return alt ^ nonBitSet.contains(ch);
}
}
@Override
protected BitSet getBits() {
if (hideBits)
return null;
return bits;
}
@Override
protected BitSet getLowHighSurrogates() {
return lowHighSurrogates;
}
@Override
public AbstractCharClass getInstance() {
if (nonBitSet == null) {
final BitSet bs = getBits();
AbstractCharClass res = new AbstractCharClass() {
@Override
public boolean contains(int ch) {
return this.alt ^ bs.get(ch);
}
@Override
public String toString() {
StringBuilder temp = new StringBuilder();
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
temp.append(Character.toChars(i));
temp.append('|');
}
if (temp.length() > 0)
temp.deleteCharAt(temp.length() - 1);
return temp.toString();
}
};
return res.setNegative(isNegative());
} else {
return this;
}
}
// for debugging purposes only
@Override
public String toString() {
StringBuilder temp = new StringBuilder();
for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
temp.append(Character.toChars(i));
temp.append('|');
}
if (temp.length() > 0)
temp.deleteCharAt(temp.length() - 1);
return temp.toString();
}
@Override
public boolean hasUCI() {
return hasUCI;
}
}

View File

@ -0,0 +1,130 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character.
*
* @author Nikolay A. Kuznetsov
*/
class CharSet extends LeafSet {
private char ch = 0;
public CharSet(char ch) {
this.ch = ch;
}
@Override
public int charCount() {
return 1;
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return (this.ch == testString.charAt(strIndex)) ? 1 : -1;
}
@Override
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
int strLength = matchResult.getRightBound();
while (strIndex < strLength) {
strIndex = testStr.indexOf(ch, strIndex);
if (strIndex < 0)
return -1;
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
return strIndex;
}
strIndex++;
}
return -1;
}
return super.find(strIndex, testString, matchResult);
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
while (lastIndex >= strIndex) {
lastIndex = testStr.lastIndexOf(ch, lastIndex);
if (lastIndex < 0 || lastIndex < strIndex) {
return -1;
}
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
return lastIndex;
}
lastIndex--;
}
return -1;
}
return super.findBack(strIndex, lastIndex, testString, matchResult);
}
@Override
protected String getName() {
return "" + ch; //$NON-NLS-1$
}
protected char getChar() {
return ch;
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return ((CharSet)set).getChar() == ch;
} else if (set instanceof RangeSet) {
return ((RangeSet)set).accepts(0, Character.toString(ch)) > 0;
} else if (set instanceof SupplRangeSet) {
return ((SupplRangeSet)set).contains(ch);
} else if (set instanceof SupplCharSet) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,108 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Composite (i.e. {n,m}) quantifier node for groups ("(X){n,m}")
*
* @author Nikolay A. Kuznetsov
*/
class CompositeGroupQuantifierSet extends GroupQuantifierSet {
protected Quantifier quantifier = null;
int setCounter;
/**
* Constructs CompositeGroupQuantifierSet
*
* @param quant
* - given composite quantifier
* @param innerSet
* - given group
* @param next
* - next set after the quantifier
*/
public CompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
int setCounter) {
super(innerSet, next, type);
this.quantifier = quant;
this.setCounter = setCounter;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int enterCounter = matchResult.getEnterCounter(setCounter);
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);
// can't go inner set;
if (enterCounter >= quantifier.max()) {
return next.matches(stringIndex, testString, matchResult);
}
// go inner set;
matchResult.setEnterCounter(setCounter, ++enterCounter);
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
if (nextIndex < 0) {
matchResult.setEnterCounter(setCounter, --enterCounter);
if (enterCounter >= quantifier.min()) {
return next.matches(stringIndex, testString, matchResult);
} else {
matchResult.setEnterCounter(setCounter, 0);
return -1;
}
} else {
matchResult.setEnterCounter(setCounter, 0);
return nextIndex;
}
}
public void reset() {
quantifier.resetCounter();
}
@Override
protected String getName() {
return quantifier.toString();
}
void setQuantifier(Quantifier quant) {
this.quantifier = quant;
}
}

View File

@ -0,0 +1,104 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Composite (i.e. {n,m}) quantifier node over the leaf nodes ("a{n,m}")
*
* @author Nikolay A. Kuznetsov
*/
class CompositeQuantifierSet extends LeafQuantifierSet {
protected Quantifier quantifier = null;
public CompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
this.quantifier = quant;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int min = quantifier.min();
int max = quantifier.max();
int i = 0;
for (; i < min; i++) {
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
matchResult.hitEnd = true;
return -1;
}
int shift = leaf.accepts(stringIndex, testString);
if (shift < 1) {
return -1;
}
stringIndex += shift;
}
for (; i < max; i++) {
int shift;
if (stringIndex + leaf.charCount() > matchResult.getRightBound() ||
(shift = leaf.accepts(stringIndex, testString)) < 1) {
break;
}
stringIndex += shift;
}
for (; i >= min; i--) {
int shift = next.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
stringIndex -= leaf.charCount();
}
return -1;
}
public void reset() {
quantifier.resetCounter();
}
@Override
protected String getName() {
return quantifier.toString();
}
void setQuantifier(Quantifier quant) {
this.quantifier = quant;
}
}

View File

@ -0,0 +1,182 @@
/*
* Copyright 2014 Alexey Andreev.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* This class is used to split the range that contains surrogate characters into
* two ranges: the first consisting of these surrogate characters and the second
* consisting of all others characters from the parent range. This class
* represents the parent range split in such a manner.
*/
class CompositeRangeSet extends JointSet {
// range without surrogates
AbstractSet withoutSurrogates;
// range containing surrogates only
AbstractSet withSurrogates;
public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates, AbstractSet next) {
this.withoutSurrogates = withoutSurrogates;
this.withSurrogates = withSurrogates;
setNext(next);
}
public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates) {
this.withoutSurrogates = withoutSurrogates;
this.withSurrogates = withSurrogates;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift = withoutSurrogates.matches(stringIndex, testString, matchResult);
if (shift < 0) {
shift = withSurrogates.matches(stringIndex, testString, matchResult);
}
if (shift >= 0) {
return shift;
}
return -1;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
withSurrogates.setNext(next);
withoutSurrogates.setNext(next);
}
public AbstractSet getSurrogates() {
return withSurrogates;
}
public AbstractSet getWithoutSurrogates() {
return withoutSurrogates;
}
@Override
protected String getName() {
return "CompositeRangeSet: " + " <nonsurrogate> " + withoutSurrogates + " <surrogate> " + withSurrogates;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
@Override
public boolean first(AbstractSet set) {
return true;
}
}

View File

@ -0,0 +1,260 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents canonical decomposition of Unicode character. Is used when
* CANON_EQ flag of Pattern class is specified.
*/
class DecomposedCharSet extends JointSet {
/**
* Contains information about number of chars that were read for a codepoint
* last time
*/
private int readCharsForCodePoint = 1;
/**
* UTF-16 encoding of decomposedChar
*/
private String decomposedCharUTF16 = null;
/**
* Decomposition of the Unicode codepoint
*/
private int[] decomposedChar;
/**
* Length of useful part of decomposedChar decomposedCharLength <=
* decomposedChar.length
*/
private int decomposedCharLength;
public DecomposedCharSet(int[] decomposedChar, int decomposedCharLength) {
this.decomposedChar = decomposedChar;
this.decomposedCharLength = decomposedCharLength;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
/*
* All decompositions have length that is less or equal
* Lexer.MAX_DECOMPOSITION_LENGTH
*/
int[] decCurCodePoint;
int[] decCodePoint = new int[Lexer.MAX_DECOMPOSITION_LENGTH];
int readCodePoints = 0;
int rightBound = matchResult.getRightBound();
int curChar;
int i = 0;
if (strIndex >= rightBound) {
return -1;
}
/*
* We read testString and decompose it gradually to compare with this
* decomposedChar at position strIndex
*/
curChar = codePointAt(strIndex, testString, rightBound);
strIndex += readCharsForCodePoint;
decCurCodePoint = Lexer.getDecomposition(curChar);
if (decCurCodePoint == null) {
decCodePoint[readCodePoints++] = curChar;
} else {
i = decCurCodePoint.length;
System.arraycopy(decCurCodePoint, 0, decCodePoint, 0, i);
readCodePoints += i;
}
if (strIndex < rightBound) {
curChar = codePointAt(strIndex, testString, rightBound);
/*
* Read testString until we met a decomposed char boundary and
* decompose obtained portion of testString
*/
while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) && !Lexer.isDecomposedCharBoundary(curChar)) {
if (Lexer.hasDecompositionNonNullCanClass(curChar)) {
/*
* A few codepoints have decompositions and non null
* canonical classes, we have to take them into
* consideration, but general rule is: if canonical class !=
* 0 then no decomposition
*/
decCurCodePoint = Lexer.getDecomposition(curChar);
/*
* Length of such decomposition is 1 or 2. See UnicodeData
* file http://www.unicode.org/Public/4.0-Update
* /UnicodeData-4.0.0.txt
*/
if (decCurCodePoint.length == 2) {
decCodePoint[readCodePoints++] = decCurCodePoint[0];
decCodePoint[readCodePoints++] = decCurCodePoint[1];
} else {
decCodePoint[readCodePoints++] = decCurCodePoint[0];
}
} else {
decCodePoint[readCodePoints++] = curChar;
}
strIndex += readCharsForCodePoint;
if (strIndex < rightBound) {
curChar = codePointAt(strIndex, testString, rightBound);
} else {
break;
}
}
}
/*
* Some optimization since length of decomposed char is <= 3 usually
*/
switch (readCodePoints) {
case 0:
case 1:
case 2:
break;
case 3:
int i1 = Lexer.getCanonicalClass(decCodePoint[1]);
int i2 = Lexer.getCanonicalClass(decCodePoint[2]);
if ((i2 != 0) && (i1 > i2)) {
i1 = decCodePoint[1];
decCodePoint[1] = decCodePoint[2];
decCodePoint[2] = i1;
}
break;
default:
decCodePoint = Lexer.getCanonicalOrder(decCodePoint, readCodePoints);
}
/*
* Compare decomposedChar with decomposed char that was just read from
* testString
*/
if (readCodePoints != decomposedCharLength) {
return -1;
}
for (i = 0; i < readCodePoints; i++) {
if (decCodePoint[i] != decomposedChar[i]) {
return -1;
}
}
return next.matches(strIndex, testString, matchResult);
}
/**
* Return UTF-16 encoding of given Unicode codepoint.
*
* @return UTF-16 encoding
*/
private String getDecomposedChar() {
if (decomposedCharUTF16 == null) {
StringBuilder strBuff = new StringBuilder();
for (int i = 0; i < decomposedCharLength; i++) {
strBuff.append(Character.toChars(decomposedChar[i]));
}
decomposedCharUTF16 = strBuff.toString();
}
return decomposedCharUTF16;
}
@Override
protected String getName() {
return "decomposed char:" + getDecomposedChar(); //$NON-NLS-1$
}
/**
* Reads Unicode codepoint from input.
*
* @param strIndex
* - index to read codepoint at
* @param testString
* - input
* @param matchResult
* - auxiliary object
* @return codepoint at given strIndex at testString and
*/
public int codePointAt(int strIndex, CharSequence testString, int rightBound) {
/*
* We store information about number of codepoints we read at variable
* readCharsForCodePoint.
*/
int curChar;
readCharsForCodePoint = 1;
if (strIndex < rightBound - 1) {
char high = testString.charAt(strIndex++);
char low = testString.charAt(strIndex);
if (Character.isSurrogatePair(high, low)) {
char[] curCodePointUTF16 = new char[] { high, low };
curChar = Character.codePointAt(curCodePointUTF16, 0);
readCharsForCodePoint = 2;
} else {
curChar = high;
}
} else {
curChar = testString.charAt(strIndex);
}
return curChar;
}
@Override
public boolean first(AbstractSet set) {
return (set instanceof DecomposedCharSet) ? ((DecomposedCharSet)set).getDecomposedChar().equals(
getDecomposedChar()) : true;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Special node for ".*" construction for any character including line
* terminators.
*
* @author Nikolay A. Kuznetsov
*/
class DotAllQuantifierSet extends QuantifierSet {
public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
if (strLength <= stringIndex) {
return next.matches(stringIndex, testString, matchResult);
}
return next.findBack(stringIndex, strLength, testString, matchResult);
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
if (next.findBack(stringIndex, strLength, testString, matchResult) >= 0) {
return stringIndex;
} else {
return -1;
}
}
@Override
protected String getName() {
return "<DotAllQuant>";
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Node accepting any character including line terminators.
*
* @author Nikolay A. Kuznetsov
*/
class DotAllSet extends JointSet {
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
if (stringIndex + 1 > strLength) {
matchResult.hitEnd = true;
return -1;
}
char high = testString.charAt(stringIndex);
if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
char low = testString.charAt(stringIndex + 1);
if (Character.isSurrogatePair(high, low)) {
return next.matches(stringIndex + 2, testString, matchResult);
}
}
return next.matches(stringIndex + 1, testString, matchResult);
}
@Override
protected String getName() {
return "DotAll";
}
@Override
public AbstractSet getNext() {
return this.next;
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public int getType() {
return AbstractSet.TYPE_DOTSET;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Special node for ".*" construction. The main idea here is to find line
* terminator and try to find the rest of the construction from this point.
*
* @author Nikolay A. Kuznetsov
*/
class DotQuantifierSet extends QuantifierSet {
AbstractLineTerminator lt;
public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type, AbstractLineTerminator lt) {
super(innerSet, next, type);
this.lt = lt;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
int startSearch = /* testString.toString().indexOf('\n', stringIndex); */
findLineTerminator(stringIndex, strLength, testString);
if (startSearch < 0) {
startSearch = strLength;
}
if (startSearch <= stringIndex) {
return next.matches(stringIndex, testString, matchResult);
}
return next.findBack(stringIndex, startSearch, testString, matchResult);
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
// String testStr = testString.toString();
int strLength = matchResult.getRightBound();
// 1. skip line terminators ???
// //
// we don't skip line terminators here, but return zero match instead
// //
// 2. find first occurrence of the searched pattern
// //
int res = next.find(stringIndex, testString, matchResult);
// 3. Check if we have other occurrences till the end of line
// (because .* is greedy and we need last one)
// //
if (res >= 0) {
int nextSearch = findLineTerminator(res, strLength, testString);
// testStr.indexOf('\n', res);
if (nextSearch < 0) {
nextSearch = strLength;
}
nextSearch = next.findBack(res, nextSearch, testString, matchResult);
res = (res < nextSearch) ? nextSearch : res;
} else {
return -1;
}
// 4. find left boundary of this search
// //
int leftBound = (res > 0) ? findBackLineTerminator(stringIndex, res - 1, testString)/*
* testStr
* .
* lastIndexOf
* (
* '\n'
* ,
* res
* -
* 1
* )
*/
: (res == 0) ? 0 : -1;
res = (leftBound >= stringIndex) ? ((leftBound < res) ? leftBound + 1 : leftBound) : stringIndex;
return res;
}
/*
* All line terminators are from Basic Multilingual Pane
*/
private int findLineTerminator(int from, int to, CharSequence testString) {
for (int i = from; i < to; i++) {
if (lt.isLineTerminator(testString.charAt(i))) {
return i;
}
}
return -1;
}
private int findBackLineTerminator(int from, int to, CharSequence testString) {
for (int i = to; i >= from; i--) {
if (lt.isLineTerminator(testString.charAt(i))) {
return i;
}
}
return -1;
}
@Override
protected String getName() {
return "<DotQuant>";
}
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Node accepting any character except line terminators;
*
* @author Nikolay A. Kuznetsov
*/
final class DotSet extends JointSet {
AbstractLineTerminator lt;
public DotSet(AbstractLineTerminator lt) {
super();
this.lt = lt;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
if (stringIndex + 1 > strLength) {
matchResult.hitEnd = true;
return -1;
}
char high = testString.charAt(stringIndex);
if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
char low = testString.charAt(stringIndex + 1);
if (Character.isSurrogatePair(high, low)) {
return lt.isLineTerminator(Character.toCodePoint(high, low)) ? -1 : next.matches(stringIndex + 2,
testString, matchResult);
}
}
return lt.isLineTerminator(high) ? -1 : next.matches(stringIndex + 1, testString, matchResult);
}
@Override
protected String getName() {
return "."; //$NON-NLS-1$
}
@Override
public AbstractSet getNext() {
return this.next;
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public int getType() {
return AbstractSet.TYPE_DOTSET;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents end of input '\z', i.e. matches only character after the last one;
*
* @author Nikolay A. Kuznetsov
*/
class EOISet extends AbstractSet {
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int rightBound = matchResult.hasTransparentBounds() ? testString.length() : matchResult.getRightBound();
if (stringIndex < rightBound)
return -1;
matchResult.hitEnd = true;
matchResult.requireEnd = true;
return next.matches(stringIndex, testString, matchResult);
}
/**
* Returns false, enough for quantifiers
*/
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "EOI";
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character.
*
* @author Nikolay A. Kuznetsov
*/
final class EOLSet extends AbstractSet {
private int consCounter;
public EOLSet(int counter) {
this.consCounter = counter;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length();
if (strIndex >= rightBound) {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
// check final line terminator;
if ((rightBound - strIndex) == 2 && testString.charAt(strIndex) == '\r' &&
testString.charAt(strIndex + 1) == '\n') {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
char ch;
if ((rightBound - strIndex) == 1 &&
(((ch = testString.charAt(strIndex)) == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'))) {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons;
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
matchResult.setConsumed(consCounter, -1);
return res;
}
@Override
protected String getName() {
return "<EOL>";
}
}

View File

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Valid constant zero character match.
*
* @author Nikolay A. Kuznetsov
*/
class EmptySet extends LeafSet {
public EmptySet(AbstractSet next) {
super(next);
charCount = 0;
}
@Override
public int accepts(int stringIndex, CharSequence testString) {
return 0;
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
int startStr = matchResult.getLeftBound();
while (stringIndex <= strLength) {
// check for supplementary codepoints
if (stringIndex < strLength) {
char low = testString.charAt(stringIndex);
if (Character.isLowSurrogate(low)) {
if (stringIndex > startStr) {
char high = testString.charAt(stringIndex - 1);
if (Character.isHighSurrogate(high)) {
stringIndex++;
continue;
}
}
}
}
if (next.matches(stringIndex, testString, matchResult) >= 0) {
return stringIndex;
}
stringIndex++;
}
return -1;
}
@Override
public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
int startStr = matchResult.getLeftBound();
while (startSearch >= stringIndex) {
// check for supplementary codepoints
if (startSearch < strLength) {
char low = testString.charAt(startSearch);
if (Character.isLowSurrogate(low)) {
if (startSearch > startStr) {
char high = testString.charAt(startSearch - 1);
if (Character.isHighSurrogate(high)) {
startSearch--;
continue;
}
}
}
}
if (next.matches(startSearch, testString, matchResult) >= 0) {
return startSearch;
}
startSearch--;
}
return -1;
}
@Override
protected String getName() {
return "<Empty set>";
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return false;
}
}

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* The node which marks end of the particular group.
*
* @author Nikolay A. Kuznetsov
*/
class FSet extends AbstractSet {
static PossessiveFSet posFSet = new PossessiveFSet();
boolean isBackReferenced = false;
private int groupIndex;
public FSet(int groupIndex) {
this.groupIndex = groupIndex;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int end = matchResult.getEnd(groupIndex);
matchResult.setEnd(groupIndex, stringIndex);
int shift = next.matches(stringIndex, testString, matchResult);
/*
* if(shift >=0 && matchResult.getEnd(groupIndex) == -1) {
* matchResult.setEnd(groupIndex, stringIndex); }
*/
if (shift < 0)
matchResult.setEnd(groupIndex, end);
return shift;
}
public int getGroupIndex() {
return groupIndex;
}
@Override
protected String getName() {
return "fSet"; //$NON-NLS-1$
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return false;
}
/**
* Marks the end of the particular group and not take into account possible
* kickbacks(required for atomic groups, for instance)
*
*/
static class PossessiveFSet extends AbstractSet {
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
return stringIndex;
}
@Override
protected String getName() {
return "posFSet"; //$NON-NLS-1$
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return false;
}
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Special construction which marks end of pattern.
*
* @author Nikolay A. Kuznetsov
*/
class FinalSet extends FSet {
public FinalSet() {
super(0);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (matchResult.mode() == TMatcher.MODE_FIND || stringIndex == matchResult.getRightBound()) {
matchResult.setValid();
matchResult.setEnd(0, stringIndex);
return stringIndex;
}
return -1;
}
@Override
protected String getName() {
return "FinalSet"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Default quantifier over groups, in fact this type of quantifier is generally
* used for constructions we cant identify number of characters they consume.
*
* @author Nikolay A. Kuznetsov
*/
class GroupQuantifierSet extends QuantifierSet {
public GroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);// return
// -1;
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
if (nextIndex < 0) {
return next.matches(stringIndex, testString, matchResult);
} else {
return nextIndex;
}
}
@Override
protected String getName() {
return "<GroupQuant>"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,194 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents canonical decomposition of Hangul syllable. Is used when CANON_EQ
* flag of Pattern class is specified.
*/
class HangulDecomposedCharSet extends JointSet {
/**
* Decomposed Hangul syllable.
*/
private char[] decomposedChar;
/**
* String representing syllable
*/
private String decomposedCharUTF16 = null;
/**
* Length of useful part of decomposedChar decomposedCharLength <=
* decomposedChar.length
*/
private int decomposedCharLength;
public HangulDecomposedCharSet(char[] decomposedChar, int decomposedCharLength) {
this.decomposedChar = decomposedChar;
this.decomposedCharLength = decomposedCharLength;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
/**
* Give string representation of this.
*
* @return - string representation.
*/
private String getDecomposedChar() {
return (decomposedCharUTF16 == null) ? (decomposedCharUTF16 = new String(decomposedChar)) : decomposedCharUTF16;
}
@Override
protected String getName() {
return "decomposed Hangul syllable:" + getDecomposedChar(); //$NON-NLS-1$
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
/*
* All decompositions for Hangul syllables have length that is less or
* equal Lexer.MAX_DECOMPOSITION_LENGTH
*/
int rightBound = matchResult.getRightBound();
int SyllIndex = 0;
int[] decompSyllable = new int[Lexer.MAX_HANGUL_DECOMPOSITION_LENGTH];
int[] decompCurSymb;
char curSymb;
/*
* For details about Hangul composition and decomposition see
* http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf
* "3.12 Conjoining Jamo Behavior"
*/
int LIndex = -1;
int VIndex = -1;
int TIndex = -1;
if (strIndex >= rightBound) {
return -1;
}
curSymb = testString.charAt(strIndex++);
decompCurSymb = Lexer.getHangulDecomposition(curSymb);
if (decompCurSymb == null) {
/*
* We deal with ordinary letter or sequence of jamos at strIndex at
* testString.
*/
decompSyllable[SyllIndex++] = curSymb;
LIndex = curSymb - Lexer.LBase;
if ((LIndex < 0) || (LIndex >= Lexer.LCount)) {
/*
* Ordinary letter, that doesn't match this
*/
return -1;
}
if (strIndex < rightBound) {
curSymb = testString.charAt(strIndex);
VIndex = curSymb - Lexer.VBase;
}
if ((VIndex < 0) || (VIndex >= Lexer.VCount)) {
/*
* Single L jamo doesn't compose Hangul syllable, so doesn't
* match
*/
return -1;
}
strIndex++;
decompSyllable[SyllIndex++] = curSymb;
if (strIndex < rightBound) {
curSymb = testString.charAt(strIndex);
TIndex = curSymb - Lexer.TBase;
}
if ((TIndex < 0) || (TIndex >= Lexer.TCount)) {
/*
* We deal with LV syllable at testString, so compare it to this
*/
return ((decomposedCharLength == 2) && (decompSyllable[0] == decomposedChar[0]) && (decompSyllable[1] == decomposedChar[1])) ? next
.matches(strIndex, testString, matchResult) : -1;
}
strIndex++;
decompSyllable[SyllIndex++] = curSymb;
/*
* We deal with LVT syllable at testString, so compare it to this
*/
return ((decomposedCharLength == 3) && (decompSyllable[0] == decomposedChar[0]) &&
(decompSyllable[1] == decomposedChar[1]) && (decompSyllable[2] == decomposedChar[2])) ? next
.matches(strIndex, testString, matchResult) : -1;
} else {
/*
* We deal with Hangul syllable at strIndex at testString. So we
* decomposed it to compare with this.
*/
int i = 0;
if (decompCurSymb.length != decomposedCharLength) {
return -1;
}
for (; i < decomposedCharLength; i++) {
if (decompCurSymb[i] != decomposedChar[i]) {
return -1;
}
}
return next.matches(strIndex, testString, matchResult);
}
}
@Override
public boolean first(AbstractSet set) {
return (set instanceof HangulDecomposedCharSet) ? ((HangulDecomposedCharSet)set).getDecomposedChar().equals(
getDecomposedChar()) : true;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,256 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents high surrogate character.
*/
class HighSurrogateCharSet extends JointSet {
/*
* Note that we can use high and low surrogate characters that don't combine
* into supplementary code point. See
* http://www.unicode.org/reports/tr18/#Supplementary_Characters
*/
private char high;
public HighSurrogateCharSet(char high) {
this.high = high;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
if (stringIndex + 1 > strLength) {
matchResult.hitEnd = true;
return -1;
}
char high = testString.charAt(stringIndex);
if (stringIndex + 1 < strLength) {
char low = testString.charAt(stringIndex + 1);
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isLowSurrogate(low)) {
return -1;
}
}
if (this.high == high) {
return next.matches(stringIndex + 1, testString, matchResult);
}
return -1;
}
@Override
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
int strLength = matchResult.getRightBound();
while (strIndex < strLength) {
strIndex = testStr.indexOf(high, strIndex);
if (strIndex < 0)
return -1;
if (strIndex + 1 < strLength) {
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isLowSurrogate(testStr.charAt(strIndex + 1))) {
strIndex += 2;
continue;
}
}
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
return strIndex;
}
strIndex++;
}
return -1;
}
return super.find(strIndex, testString, matchResult);
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
int strLength = matchResult.getRightBound();
while (lastIndex >= strIndex) {
lastIndex = testStr.lastIndexOf(high, lastIndex);
if (lastIndex < 0 || lastIndex < strIndex) {
return -1;
}
if (lastIndex + 1 < strLength) {
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isLowSurrogate(testStr.charAt(lastIndex + 1))) {
lastIndex--;
continue;
}
}
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
return lastIndex;
}
lastIndex--;
}
return -1;
}
return super.findBack(strIndex, lastIndex, testString, matchResult);
}
@Override
protected String getName() {
return "" + high;
}
protected int getChar() {
return high;
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return false;
} else if (set instanceof RangeSet) {
return false;
} else if (set instanceof SupplRangeSet) {
return false;
} else if (set instanceof SupplCharSet) {
return false;
} else if (set instanceof LowSurrogateCharSet) {
return false;
} else if (set instanceof HighSurrogateCharSet) {
return ((HighSurrogateCharSet)set).high == this.high;
}
return true;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.text.MessageFormat;
/**
* Internationalization stub. All the messages in java.util.regexp
* package done though this class. This class should be lately replaced with
* real internationalization utility.
*
* @author Nikolay A. Kuznetsov
*
*/
class I18n {
public static String getMessage(String message) {
return message;
}
public static String getFormattedMessage(String message, Object arg1) {
return MessageFormat.format(message, new Object[] {arg1});
}
public static String getFormattedMessage(String message, Object arg1, Object arg2) {
return MessageFormat.format(message, new Object[] {arg1, arg2});
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Hashtable implementation for int arrays.
*/
class IntArrHash {
final int[] table;
final Object[] values;
final int mask;
public IntArrHash(int size) {
int tmpMask = 0;
while (size >= tmpMask) {
tmpMask = (tmpMask << 1) | 1;
}
mask = (tmpMask << 1) | 1;
table = new int[mask + 1];
values = new Object [mask + 1];
}
public void put(int key, int [] value) {
int i = 0;
int hashCode = key & mask;
for (; ; ) {
if (table[hashCode] == 0 // empty
|| table[hashCode] == key) { // rewrite
table[hashCode] = key;
values[hashCode] = value;
return;
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
public int [] get(int key) {
int hashCode = key & mask;
int i = 0;
int storedKey;
for (; ; ) {
storedKey = table[hashCode];
if (storedKey == 0) { // empty
return null;
}
if (storedKey == key) {
return (int []) values[hashCode];
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Hashtable implementation for int values.
*/
class IntHash {
int[] table;
int[] values;
int mask;
int size; // maximum shift
public IntHash(int size) {
while (size >= mask) {
mask = (mask << 1) | 1;
}
mask = (mask << 1) | 1;
table = new int[mask + 1];
values = new int[mask + 1];
this.size = size;
}
public void put(int key, int value) {
int i = 0;
int hashCode = key & mask;
for (; ; ) {
if (table[hashCode] == 0 // empty
|| table[hashCode] == key) { // rewrite
table[hashCode] = key;
values[hashCode] = value;
return;
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
public int get(int key) {
int hashCode = key & mask;
int i = 0;
int storedKey;
for (; ; ) {
storedKey = table[hashCode];
if (storedKey == 0) { // empty
return size;
}
if (storedKey == key) {
return values[hashCode];
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
}

View File

@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
import java.util.Iterator;
/**
* Represents group, which is alternation of other subexpression. One should
* think about "group" in this model as JointSet opening group and corresponding
* FSet closing group.
*/
class JointSet extends AbstractSet {
protected ArrayList<AbstractSet> children;
protected AbstractSet fSet;
protected int groupIndex;
protected JointSet() {
}
public JointSet(ArrayList<AbstractSet> children, FSet fSet) {
this.children = children;
this.fSet = fSet;
this.groupIndex = fSet.getGroupIndex();
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (children == null) {
return -1;
}
int start = matchResult.getStart(groupIndex);
matchResult.setStart(groupIndex, stringIndex);
int size = children.size();
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
int shift = e.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
}
matchResult.setStart(groupIndex, start);
return -1;
}
@Override
public void setNext(AbstractSet next) {
fSet.setNext(next);
}
@Override
public AbstractSet getNext() {
return fSet.getNext();
}
@Override
protected String getName() {
return "JointSet"; //$NON-NLS-1$
}
public int getGroup() {
return groupIndex;
}
@Override
public boolean first(AbstractSet set) {
if (children != null) {
for (Iterator<AbstractSet> i = children.iterator(); i.hasNext();) {
if ((i.next()).first(set)) {
return true;
}
}
}
return false;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return !(matchResult.getEnd(groupIndex) >= 0 && matchResult.getStart(groupIndex) == matchResult
.getEnd(groupIndex));
}
/**
* This method is used for traversing nodes after the first stage of
* compilation.
*/
@Override
public void processSecondPass() {
this.isSecondPassVisited = true;
if (fSet != null && !fSet.isSecondPassVisited) {
fSet.processSecondPass();
}
if (children != null) {
int childrenSize = children.size();
for (int i = 0; i < childrenSize; i++) {
AbstractSet child = children.get(i);
JointSet set = child.processBackRefReplacement();
if (set != null) {
child.isSecondPassVisited = true;
children.remove(i);
children.add(i, set);
child = set;
}
if (!child.isSecondPassVisited) {
child.processSecondPass();
}
}
}
if (next != null) {
super.processSecondPass();
}
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* @author Nikolay A. Kuznetsov
*/
class LeafQuantifierSet extends QuantifierSet {
protected LeafSet leaf;
public LeafQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
this.leaf = innerSet;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int i = 0;
int shift = 0;
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
(shift = leaf.accepts(stringIndex, testString)) > 0) {
stringIndex += shift;
i++;
}
for (; i >= 0; i--) {
shift = next.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
stringIndex -= leaf.charCount();
}
return -1;
}
@Override
protected String getName() {
return "<Quant>";
}
/**
* Sets an inner set.
*
* @param innerSet
* The innerSet to set.
*/
@Override
public void setInnerSet(AbstractSet innerSet) {
if (!(innerSet instanceof LeafSet))
throw new RuntimeException("");
super.setInnerSet(innerSet);
this.leaf = (LeafSet)innerSet;
}
}

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Base class for nodes representing leaf tokens of the RE, those who consumes
* fixed number of characters.
*
* @author Nikolay A. Kuznetsov
*/
abstract class LeafSet extends AbstractSet {
protected int charCount = 1;
public LeafSet(AbstractSet next) {
super(next);
setType(AbstractSet.TYPE_LEAF);
}
public LeafSet() {
}
/**
* Returns "shift", the number of accepted chars commonly internal function,
* but called by quantifiers.
*/
public abstract int accepts(int stringIndex, CharSequence testString);
/**
* Checks if we can enter this state and pass the control to the next one.
* Return positive value if match succeeds, negative otherwise.
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (stringIndex + charCount() > matchResult.getRightBound()) {
matchResult.hitEnd = true;
return -1;
}
int shift = accepts(stringIndex, testString);
if (shift < 0) {
return -1;
}
return next.matches(stringIndex + shift, testString, matchResult);
}
/**
* Returns number of characters this node consumes.
*
* @return number of characters this node consumes.
*/
public int charCount() {
return charCount;
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return true;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,194 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/*
* This class is a range that contains only surrogate characters.
*/
class LowHighSurrogateRangeSet extends JointSet {
protected AbstractCharClass surrChars;
protected boolean alt = false;
public LowHighSurrogateRangeSet(AbstractCharClass surrChars, AbstractSet next) {
this.surrChars = surrChars.getInstance();
this.alt = surrChars.alt;
setNext(next);
}
public LowHighSurrogateRangeSet(AbstractCharClass surrChars) {
this.surrChars = surrChars.getInstance();
this.alt = surrChars.alt;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int startStr = matchResult.getLeftBound();
int strLength = matchResult.getRightBound();
if (stringIndex + 1 > strLength) {
matchResult.hitEnd = true;
return -1;
}
char ch = testString.charAt(stringIndex);
if (!surrChars.contains(ch)) {
return -1;
}
if (Character.isHighSurrogate(ch)) {
if (stringIndex + 1 < strLength) {
char low = testString.charAt(stringIndex + 1);
if (Character.isLowSurrogate(low)) {
return -1;
}
}
} else if (Character.isLowSurrogate(ch)) {
if (stringIndex > startStr) {
char high = testString.charAt(stringIndex - 1);
if (Character.isHighSurrogate(high)) {
return -1;
}
}
}
return next.matches(stringIndex + 1, testString, matchResult);
}
@Override
protected String getName() {
return "range:" + (alt ? "^ " : " ") + surrChars.toString();
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return false;
} else if (set instanceof RangeSet) {
return false;
} else if (set instanceof SupplRangeSet) {
return false;
} else if (set instanceof SupplCharSet) {
return false;
}
return true;
}
protected AbstractCharClass getChars() {
return surrChars;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,255 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents low surrogate character.
*/
class LowSurrogateCharSet extends JointSet {
/*
* Note that we can use high and low surrogate characters that don't combine
* into supplementary code point. See
* http://www.unicode.org/reports/tr18/#Supplementary_Characters
*/
private char low;
public LowSurrogateCharSet(char low) {
this.low = low;
}
/**
* Returns the next.
*/
@Override
public AbstractSet getNext() {
return this.next;
}
/**
* Sets next abstract set.
*
* @param next
* The next to set.
*/
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (stringIndex + 1 > matchResult.getRightBound()) {
matchResult.hitEnd = true;
return -1;
}
char low = testString.charAt(stringIndex);
if (stringIndex > matchResult.getLeftBound()) {
char high = testString.charAt(stringIndex - 1);
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isHighSurrogate(high)) {
return -1;
}
}
if (this.low == low) {
return next.matches(stringIndex + 1, testString, matchResult);
}
return -1;
}
@Override
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
int startStr = matchResult.getLeftBound();
int strLength = matchResult.getRightBound();
while (strIndex < strLength) {
strIndex = testStr.indexOf(low, strIndex);
if (strIndex < 0)
return -1;
if (strIndex > startStr) {
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isHighSurrogate(testStr.charAt(strIndex - 1))) {
strIndex++;
continue;
}
}
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
return strIndex;
}
strIndex++;
}
return -1;
}
return super.find(strIndex, testString, matchResult);
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
int startStr = matchResult.getLeftBound();
String testStr = (String)testString;
while (lastIndex >= strIndex) {
lastIndex = testStr.lastIndexOf(low, lastIndex);
if (lastIndex < 0 || lastIndex < strIndex) {
return -1;
}
if (lastIndex > startStr) {
/*
* we consider high surrogate followed by low surrogate as a
* codepoint
*/
if (Character.isHighSurrogate(testStr.charAt(lastIndex - 1))) {
lastIndex -= 2;
continue;
}
}
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
return lastIndex;
}
lastIndex--;
}
return -1;
}
return super.findBack(strIndex, lastIndex, testString, matchResult);
}
@Override
protected String getName() {
return "" + low;
}
protected int getChar() {
return low;
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return false;
} else if (set instanceof RangeSet) {
return false;
} else if (set instanceof SupplRangeSet) {
return false;
} else if (set instanceof SupplCharSet) {
return false;
} else if (set instanceof HighSurrogateCharSet) {
return false;
} else if (set instanceof LowSurrogateCharSet) {
return ((LowSurrogateCharSet)set).low == this.low;
}
return true;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return true;
}
}

View File

@ -0,0 +1,267 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.Arrays;
/**
* Match result implementation Note: probably it might make sense to combine
* this class with Matcher.
*
* @author Nikolay A. Kuznetsov
*/
class MatchResultImpl implements TMatchResult {
private int[] groupBounds = null;
private int[] consumers = null;
private int[] compQuantCounters = null;
private CharSequence string = null;
private int groupCount = 0;
private boolean valid = false;
private int leftBound;
private int rightBound;
int startIndex;
private boolean transparentBounds = false;
private boolean anchoringBounds = false;
boolean hitEnd = false;
boolean requireEnd = false;
int previousMatch = -1;
private int mode;
MatchResultImpl(CharSequence string, int leftBound, int rightBound, int groupCount, int compQuantCount,
int consumersCount) {
this.groupCount = ++groupCount;
this.groupBounds = new int[groupCount * 2];
this.consumers = new int[consumersCount];
Arrays.fill(consumers, -1);
if (compQuantCount > 0)
this.compQuantCounters = new int[compQuantCount];
Arrays.fill(groupBounds, -1);
reset(string, leftBound, rightBound);
}
TMatchResult cloneImpl() {
MatchResultImpl res = new MatchResultImpl(this.string, this.leftBound, this.rightBound, this.groupCount - 1, 0,
0);
res.valid = valid;
if (valid) {
System.arraycopy(groupBounds, 0, res.groupBounds, 0, this.groupBounds.length);
}
return res;
}
public void setConsumed(int counter, int value) {
this.consumers[counter] = value;
}
public int getConsumed(int counter) {
return this.consumers[counter];
}
@Override
public int end() {
return end(0);
}
@Override
public int end(int group) {
checkGroup(group);
return groupBounds[group * 2 + 1];
}
void setStart(int group, int offset) {
groupBounds[group * 2] = offset;
}
void setEnd(int group, int offset) {
groupBounds[group * 2 + 1] = offset;
}
int getStart(int group) {
return groupBounds[group * 2];
}
int getEnd(int group) {
return groupBounds[group * 2 + 1];
}
@Override
public String group() {
return group(0);
}
@Override
public String group(int group) {
if (start(group) < 0)
return null;
return string.subSequence(start(group), end(group)).toString();
}
String getGroupNoCheck(int group) {
int st = getStart(group);
int end = getEnd(group);
if ((end | st | (end - st)) < 0 || end > string.length())
return null;
return string.subSequence(st, end).toString();
}
@Override
public int groupCount() {
return groupCount - 1;
}
@Override
public int start() {
return start(0);
}
@Override
public int start(int group) {
checkGroup(group);
return groupBounds[group * 2];
}
/*
* This method being called after any successful match; For now it's being
* used to check zero group for empty match;
*/
public void finalizeMatch() {
if (this.groupBounds[0] == -1) {
this.groupBounds[0] = this.startIndex;
this.groupBounds[1] = this.startIndex;
}
previousMatch = end();
}
public int getEnterCounter(int setCounter) {
return compQuantCounters[setCounter];
}
public void setEnterCounter(int setCounter, int value) {
compQuantCounters[setCounter] = value;
}
private void checkGroup(int group) {
if (!valid) {
throw new IllegalStateException();
}
if (group < 0 || group > groupCount) {
throw new IndexOutOfBoundsException(String.valueOf(group));
}
}
void updateGroup(int index, int srtOffset, int endOffset) {
checkGroup(index);
groupBounds[index * 2] = srtOffset;
groupBounds[index * 2 + 1] = endOffset;
}
protected void setValid() {
this.valid = true;
}
protected boolean isValid() {
return this.valid;
}
protected void reset(CharSequence newSequence, int leftBound, int rightBound) {
valid = false;
mode = TMatcher.MODE_MATCH;
Arrays.fill(groupBounds, -1);
Arrays.fill(consumers, -1);
if (newSequence != null)
this.string = newSequence;
if (leftBound >= 0)
this.setBounds(leftBound, rightBound);
this.startIndex = this.leftBound;
}
protected void reset() {
reset(null, -1, -1);
}
private void setBounds(int leftBound, int rightBound) {
this.leftBound = leftBound;
this.rightBound = rightBound;
}
protected void setStartIndex(int startIndex) {
this.startIndex = startIndex;
previousMatch = previousMatch >= 0 ? previousMatch : startIndex;
}
public int getLeftBound() {
return this.leftBound;
}
public int getRightBound() {
return this.rightBound;
}
protected void setMode(int mode) {
this.mode = mode;
}
protected int mode() {
return mode;
}
protected void useAnchoringBounds(boolean value) {
this.anchoringBounds = value;
}
protected boolean hasAnchoringBounds() {
return this.anchoringBounds;
}
protected void useTransparentBounds(boolean value) {
this.transparentBounds = value;
}
protected boolean hasTransparentBounds() {
return this.transparentBounds;
}
int getPreviousMatchEnd() {
return previousMatch;
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents multiline version of the dollar sign.
*
* @author Nikolay A. Kuznetsov
*/
class MultiLineEOLSet extends AbstractSet {
private int consCounter;
public MultiLineEOLSet(int counter) {
this.consCounter = counter;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
int strDif = matchResult.hasAnchoringBounds() ? matchResult.getLeftBound() - strIndex : testString.length() -
strIndex;
char ch1;
char ch2;
if (strDif == 0) {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
} else if (strDif >= 2) {
ch1 = testString.charAt(strIndex);
ch2 = testString.charAt(strIndex + 1);
} else {
ch1 = testString.charAt(strIndex);
ch2 = 'a';
}
switch (ch1) {
case '\r': {
if (ch2 == '\n') {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
case '\n':
case '\u0085':
case '\u2028':
case '\u2029': {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
default:
return -1;
}
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons;
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
matchResult.setConsumed(consCounter, -1);
return res;
}
@Override
protected String getName() {
return "<MultiLine $>"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Multiline version of the ^ sign.
*
* @author Nikolay A. Kuznetsov
*/
class MultiLineSOLSet extends AbstractSet {
private AbstractLineTerminator lt;
public MultiLineSOLSet(AbstractLineTerminator lt) {
this.lt = lt;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
if (strIndex != matchResult.getRightBound() &&
((strIndex == 0 || (matchResult.hasAnchoringBounds() && strIndex == matchResult.getLeftBound())) || lt
.isAfterLineTerminator(testString.charAt(strIndex - 1), testString.charAt(strIndex)))) {
return next.matches(strIndex, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "^";
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Negative look ahead node.
*
* @author Nikolay A. Kuznetsov
*/
class NegativeLookAhead extends AtomicJointSet {
public NegativeLookAhead(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int size = children.size();
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
if (e.matches(stringIndex, testString, matchResult) >= 0)
return -1;
}
return next.matches(stringIndex, testString, matchResult);
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "NegLookaheadJointSet";
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Negative look behind node.
*
* @author Nikolay A. Kuznetsov
*/
class NegativeLookBehind extends AtomicJointSet {
public NegativeLookBehind(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString,
MatchResultImpl matchResult) {
int size = children.size();
int shift;
// fSet will take this index to check if we at the right bound
// and return true if the current index equal to this one
matchResult.setConsumed(groupIndex, stringIndex);
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
// find limits could be calculated though e.getCharCount()
// fSet will return true only if string index at fSet equal
// to stringIndex
shift = e.findBack(0, stringIndex, testString, matchResult);
if (shift >= 0) {
return -1;
}
}
return next.matches(stringIndex, testString, matchResult);
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "NegBehindJointSet";
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Non-capturing group closing node.
*
* @author Nikolay A. Kuznetsov
*/
class NonCapFSet extends FSet {
public NonCapFSet(int groupIndex) {
super(groupIndex);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int gr = getGroupIndex();
matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr));
return next.matches(stringIndex, testString, matchResult);
}
@Override
protected String getName() {
return "NonCapFSet";
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return false;
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Node representing non-capturing group
* @author Nikolay A. Kuznetsov
*/
class NonCapJointSet extends JointSet {
protected NonCapJointSet() {
}
public NonCapJointSet(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString,
MatchResultImpl matchResult) {
int start = matchResult.getConsumed(groupIndex);
matchResult.setConsumed(groupIndex, stringIndex);
int size = children.size();
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
int shift = e.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
}
matchResult.setConsumed(groupIndex, start);
return -1;
}
@Override
protected String getName() {
return "NonCapJointSet"; //$NON-NLS-1$
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons = matchResult.getConsumed(groupIndex);
return cons != 0;
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive quantifier over group, see java.util.regex.GroupQuantifierSet for
* more details.
*
* @author Nikolay A. Kuznetsov
*/
class PosAltGroupQuantifierSet extends AltGroupQuantifierSet {
public PosAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
((JointSet)innerSet).setNext(FSet.posFSet);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
if (nextIndex > 0) {
stringIndex = nextIndex;
}
return next.matches(stringIndex, testString, matchResult);
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive composite (i.e. {n,m}) quantifier node over groups.
*
* @author Nikolay A. Kuznetsov
*/
class PosCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet {
public PosCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
int setCounter) {
super(quant, innerSet, next, type, setCounter);
innerSet.setNext(FSet.posFSet);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int nextIndex;
int counter = 0;
int max = quantifier.max();
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex && counter < max) {
counter++;
stringIndex = nextIndex;
}
if (nextIndex < 0 && counter < quantifier.min()) {
return -1;
} else {
return next.matches(stringIndex, testString, matchResult);
}
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive + quantifier node over groups.
*
* @author Nikolay A. Kuznetsov
*/
class PosPlusGroupQuantifierSet extends GroupQuantifierSet {
public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
((JointSet)innerSet).setNext(FSet.posFSet);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int nextIndex;
if ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) < 0) {
return -1;
} else if (nextIndex > stringIndex) {
stringIndex = nextIndex;
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex) {
stringIndex = nextIndex;
}
}
return next.matches(stringIndex, testString, matchResult);
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Positive lookahead node.
*
* @author Nikolay A. Kuznetsov
*/
class PositiveLookAhead extends AtomicJointSet {
public PositiveLookAhead(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int size = children.size();
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
int shift = e.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
// PosLookaheadFset always returns true, position remains the
// same
// next.match() from;
return next.matches(stringIndex, testString, matchResult);
}
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "PosLookaheadJointSet";
}
}

View File

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Positive lookbehind node.
*
* @author Nikolay A. Kuznetsov
*/
class PositiveLookBehind extends AtomicJointSet {
public PositiveLookBehind(ArrayList<AbstractSet> children, FSet fSet) {
super(children, fSet);
}
/**
* Returns stringIndex+shift, the next position to match
*/
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int size = children.size();
int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound();
int shift = next.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
// fSet will take this index to check if we at the right bound
// and return true if the current index equal to this one
matchResult.setConsumed(groupIndex, stringIndex);
for (int i = 0; i < size; i++) {
AbstractSet e = children.get(i);
// find limits could be calculated though e.getCharCount()
// fSet will return true only if string index at fSet equal
// to stringIndex
if (e.findBack(leftBound, stringIndex, testString, matchResult) >= 0) {
matchResult.setConsumed(groupIndex, -1);
return shift;
}
}
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "PosBehindJointSet";
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive ? quantifier node.
*
* @author Nikolay A. Kuznetsov
*/
class PossessiveAltQuantifierSet extends AltQuantifierSet {
public PossessiveAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift = 0;
if (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
(shift = leaf.accepts(stringIndex, testString)) >= 1) {
stringIndex += shift;
}
return next.matches(stringIndex, testString, matchResult);
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive composite (i.e. {n, m}) quantifier node.
*
* @author Nikolay A. Kuznetsov
*/
class PossessiveCompositeQuantifierSet extends CompositeQuantifierSet {
public PossessiveCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
super(quant, innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int min = quantifier.min();
int max = quantifier.max();
int i = 0;
for (; i < min; i++) {
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
matchResult.hitEnd = true;
return -1;
}
int shift = leaf.accepts(stringIndex, testString);
if (shift < 1) {
return -1;
}
stringIndex += shift;
}
for (; i < max; i++) {
int shift;
if (stringIndex + leaf.charCount() > matchResult.getRightBound() ||
(shift = leaf.accepts(stringIndex, testString)) < 1) {
break;
}
stringIndex += shift;
}
return next.matches(stringIndex, testString, matchResult);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive quantifier set over groups.
*
* @author Nikolay A. Kuznetsov
*/
class PossessiveGroupQuantifierSet extends GroupQuantifierSet {
public PossessiveGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
innerSet.setNext(FSet.posFSet);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int nextIndex;
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > 0) {
stringIndex = nextIndex;
}
return next.matches(stringIndex, testString, matchResult);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Possessive quantifier set over LeafSet's
*
* @author Nikolay A. Kuznetsov
*/
class PossessiveQuantifierSet extends LeafQuantifierSet {
public PossessiveQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift = 0;
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
(shift = leaf.accepts(stringIndex, testString)) >= 1) {
stringIndex += shift;
}
return next.matches(stringIndex, testString, matchResult);
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Node representing previous match (\G).
*
* @author Nikolay A. Kuznetsov
*/
class PreviousMatch extends AbstractSet {
@Override
public int matches(int stringIndex, CharSequence testString,
MatchResultImpl matchResult) {
if (stringIndex == matchResult.getPreviousMatchEnd()) {
return next.matches(stringIndex, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "PreviousMatch";
}
}

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents RE quantifier; contains two fields responsible for min and max
* number of repetitions. Negative value for maximum number of repetition
* represents infinity(i.e. +,*)
*
* @author Nikolay A. Kuznetsov
*/
class Quantifier extends SpecialToken implements Cloneable {
private int min;
private int max;
private int counter = 0;
public Quantifier(int min) {
this.min = this.max = min;
}
public Quantifier(int min, int max) {
this.min = min;
this.max = max;
}
public void resetCounter() {
counter = 0;
}
public int getCounter() {
return counter;
}
public void setCounter(int counter) {
this.counter = counter;
}
public int min() {
return min;
}
public int max() {
return max;
}
@Override
public String toString() {
return "{" + min + "," + ((max == Integer.MAX_VALUE) ? "" : new Integer(max).toString()) + "}";
}
@Override
public int getType() {
return SpecialToken.TOK_QUANTIFIER;
}
@Override
public Object clone() {
return new Quantifier(min, max);
}
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Base class for quantifiers.
*
* @author Nikolay A. Kuznetsov
*/
abstract class QuantifierSet extends AbstractSet {
protected AbstractSet innerSet;
public QuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(next);
this.innerSet = innerSet;
setType(type);
}
/**
* Returns the innerSet.
*/
public AbstractSet getInnerSet() {
return innerSet;
}
/**
* Sets an inner set.
*
* @param innerSet
* The innerSet to set.
*/
public void setInnerSet(AbstractSet innerSet) {
this.innerSet = innerSet;
}
@Override
public boolean first(AbstractSet set) {
return innerSet.first(set) || next.first(set);
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return true;
}
/**
* This method is used for traversing nodes after the first stage of
* compilation.
*/
@Override
public void processSecondPass() {
this.isSecondPassVisited = true;
if (next != null) {
if (!next.isSecondPassVisited) {
/*
* Add here code to do during the pass
*/
JointSet set = next.processBackRefReplacement();
if (set != null) {
next.isSecondPassVisited = true;
next = set;
}
/*
* End code to do during the pass
*/
next.processSecondPass();
}
}
if (innerSet != null) {
if (!innerSet.isSecondPassVisited) {
/*
* Add here code to do during the pass
*/
JointSet set = innerSet.processBackRefReplacement();
if (set != null) {
innerSet.isSecondPassVisited = true;
innerSet = set;
}
/*
* End code to do during the pass
*/
innerSet.processSecondPass();
} else {
/*
* We reach node through innerSet but it is already traversed.
* You can see this situation for GroupQuantifierSet.innerset if
* we compile smth like "(a)+ when GroupQuantifierSet ==
* GroupQuantifierSet.innerset.fSet.next
*/
/*
* Add here code to do during the pass
*/
if (innerSet instanceof SingleSet && ((FSet)((JointSet)innerSet).fSet).isBackReferenced) {
innerSet = innerSet.next;
}
/*
* End code to do during the pass
*/
}
}
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character from the given char class.
*
* @author Nikolay A. Kuznetsov
*/
class RangeSet extends LeafSet {
private AbstractCharClass chars;
private boolean alt = false;
public RangeSet(AbstractCharClass cs, AbstractSet next) {
super(next);
this.chars = cs.getInstance();
this.alt = cs.alt;
}
public RangeSet(AbstractCharClass cc) {
this.chars = cc.getInstance();
this.alt = cc.alt;
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return chars.contains(testString.charAt(strIndex)) ? 1 : -1;
}
@Override
protected String getName() {
return "range:" + (alt ? "^ " : " ") + chars.toString();
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return AbstractCharClass.intersects(chars, ((CharSet)set).getChar());
} else if (set instanceof RangeSet) {
return AbstractCharClass.intersects(chars, ((RangeSet)set).chars);
} else if (set instanceof SupplRangeSet) {
return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).getChars());
} else if (set instanceof SupplCharSet) {
return false;
}
return true;
}
protected AbstractCharClass getChars() {
return chars;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Reluctant version of "?" quantifier set over group.
*
* @author Nikolay A. Kuznetsov
*/
class RelAltGroupQuantifierSet extends AltGroupQuantifierSet {
public RelAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);
int nextIndex = next.matches(stringIndex, testString, matchResult);
if (nextIndex < 0) {
return innerSet.matches(stringIndex, testString, matchResult);
} else {
return nextIndex;
}
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Reluctant version of composite (i.e. {n,m}) quantifier node over group.
*
* @author Nikolay A. Kuznetsov
*/
class RelCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet {
public RelCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
int setCounter) {
super(quant, innerSet, next, type, setCounter);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int enterCounter = matchResult.getEnterCounter(setCounter);
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);
// can't go inner set;
if (enterCounter >= quantifier.max()) {
matchResult.setEnterCounter(setCounter, 0);
return next.matches(stringIndex, testString, matchResult);
}
int nextIndex;
if (enterCounter >= quantifier.min()) {
nextIndex = next.matches(stringIndex, testString, matchResult);
if (nextIndex < 0) {
matchResult.setEnterCounter(setCounter, ++enterCounter);
nextIndex = innerSet.matches(stringIndex, testString, matchResult);
} else {
matchResult.setEnterCounter(setCounter, 0);
return nextIndex;
}
} else {
matchResult.setEnterCounter(setCounter, ++enterCounter);
nextIndex = innerSet.matches(stringIndex, testString, matchResult);
}
return nextIndex;
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents ?? quantifier over leaf sets.
*
* @author Nikolay A. Kuznetsov
*/
class ReluctantAltQuantifierSet extends AltQuantifierSet {
public ReluctantAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift;
if ((shift = next.matches(stringIndex, testString, matchResult)) >= 0) {
return shift;
} else {
return innerSet.matches(stringIndex, testString, matchResult);
}
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Reluctant version of composite(i.e. {n,m}) quantifier set over leaf nodes.
*
* @author Nikolay A. Kuznetsov
*/
class ReluctantCompositeQuantifierSet extends CompositeQuantifierSet {
public ReluctantCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
super(quant, innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int min = quantifier.min();
int max = quantifier.max();
int i = 0;
int shift = 0;
for (; i < min; i++) {
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
matchResult.hitEnd = true;
return -1;
}
shift = leaf.accepts(stringIndex, testString);
if (shift < 1) {
return -1;
}
stringIndex += shift;
}
do {
shift = next.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) {
shift = leaf.accepts(stringIndex, testString);
stringIndex += shift;
i++;
}
} while (shift >= 1 && i <= max);
return -1;
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Relactant version of the group quantifier set.
*
* @author Nikolay A. Kuznetsov
*/
class ReluctantGroupQuantifierSet extends GroupQuantifierSet {
public ReluctantGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
if (!innerSet.hasConsumed(matchResult))
return next.matches(stringIndex, testString, matchResult);
int res = next.matches(stringIndex, testString, matchResult);
if (res < 0) {
return innerSet.matches(stringIndex, testString, matchResult);
} else {
return res;
}
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents [+*]? constructs over LeafSets.
*
* @see java.util.regex.LeafSet
* @author Nikolay A. Kuznetsov
*/
class ReluctantQuantifierSet extends LeafQuantifierSet {
public ReluctantQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int shift = 0;
do {
shift = next.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) {
shift = leaf.accepts(stringIndex, testString);
stringIndex += shift;
}
} while (shift >= 1);
return -1;
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character.
*
* @author Nikolay A. Kuznetsov
*/
final class SOLSet extends AbstractSet {
@Override
public int matches(int strIndex, CharSequence testString,
MatchResultImpl matchResult) {
if (strIndex == 0
|| (matchResult.hasAnchoringBounds() && strIndex == matchResult
.getLeftBound())) {
return next.matches(strIndex, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
return false;
}
@Override
protected String getName() {
return "<SOL>";
}
}

View File

@ -0,0 +1,228 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* This class represents nodes constructed with character sequences. For
* example, lets consider regular expression: ".*word.*". During regular
* expression compilation phase character sequence w-o-r-d, will be represented
* with single node for the entire word.
*
* During the match phase, Moyer-Moore algorithm will be used for fast
* searching.
*
* Please follow the next link for more details about mentioned algorithm:
* http://portal.acm.org/citation.cfm?id=359859
*
* @author Nikolay A. Kuznetsov
*/
class SequenceSet extends LeafSet {
private String string = null;
private IntHash leftToRight;
private IntHash rightToLeft;
SequenceSet(StringBuffer substring) {
this.string = substring.toString();
charCount = substring.length();
leftToRight = new IntHash(charCount);
rightToLeft = new IntHash(charCount);
for (int j = 0; j < charCount - 1; j++) {
leftToRight.put(string.charAt(j), charCount - j - 1);
rightToLeft
.put(string.charAt(charCount - j - 1), charCount - j - 1);
}
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return startsWith(testString, strIndex) ? charCount : -1;
}
@Override
public int find(int strIndex, CharSequence testString,
MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
while (strIndex <= strLength) {
strIndex = indexOf(testString, strIndex, strLength);
if (strIndex < 0)
return -1;
if (next.matches(strIndex + charCount, testString, matchResult) >= 0)
return strIndex;
strIndex++;
}
return -1;
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString,
MatchResultImpl matchResult) {
while (lastIndex >= strIndex) {
lastIndex = lastIndexOf(testString, strIndex, lastIndex);
if (lastIndex < 0)
return -1;
if (next.matches(lastIndex + charCount, testString, matchResult) >= 0)
return lastIndex;
lastIndex--;
}
return -1;
}
@Override
public String getName() {
return "sequence: " + string; //$NON-NLS-1$
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof CharSet) {
return ((CharSet) set).getChar() == string.charAt(0);
} else if (set instanceof RangeSet) {
return ((RangeSet) set).accepts(0, string.substring(0, 1)) > 0;
} else if (set instanceof SupplRangeSet) {
return ((SupplRangeSet) set).contains(string.charAt(0))
|| ((string.length() > 1) && ((SupplRangeSet) set).contains(Character
.toCodePoint(string.charAt(0), string.charAt(1))));
} else if ((set instanceof SupplCharSet)) {
return (string.length() > 1)
? ((SupplCharSet) set).getCodePoint()
== Character.toCodePoint(string.charAt(0),
string.charAt(1))
: false;
}
return true;
}
protected int indexOf(CharSequence str, int from, int to) {
int last = string.charAt(charCount - 1);
int i = from;
while (i <= to - charCount) {
char ch = str.charAt(i + charCount - 1);
if (ch == last && startsWith(str, i)) {
return i;
}
i += leftToRight.get(ch);
}
return -1;
}
protected int lastIndexOf(CharSequence str, int to, int from) {
int first = string.charAt(0);
int size = str.length();
int delta;
int i = ((delta = size - from - charCount) > 0) ? from : from + delta;
while (i >= to) {
char ch = str.charAt(i);
if (ch == first && startsWith(str, i)) {
return i;
}
i -= rightToLeft.get(ch);
}
return -1;
}
protected boolean startsWith(CharSequence str, int from) {
for (int i = 0; i < charCount; i++) {
if (str.charAt(i + from) != string.charAt(i))
return false;
}
return true;
}
static class IntHash {
int[] table, values;
int mask;
int size; // <-maximum shift
public IntHash(int size) {
while (size >= mask) {
mask = (mask << 1) | 1;
}
mask = (mask << 1) | 1;
table = new int[mask + 1];
values = new int[mask + 1];
this.size = size;
}
public void put(int key, int value) {
int i = 0;
int hashCode = key & mask;
for (;;) {
if (table[hashCode] == 0 // empty
|| table[hashCode] == key) {// rewrite
table[hashCode] = key;
values[hashCode] = value;
return;
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
public int get(int key) {
int hashCode = key & mask;
int i = 0;
int storedKey;
for (;;) {
storedKey = table[hashCode];
if (storedKey == 0) { // empty
return size;
}
if (storedKey == key) {
return values[hashCode];
}
i++;
i &= mask;
hashCode += i;
hashCode &= mask;
}
}
}
}

View File

@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Group node over subexpression w/o alternations.
*
* @author Nikolay A. Kuznetsov
*/
class SingleSet extends JointSet {
protected AbstractSet kid;
public SingleSet(AbstractSet child, FSet fSet) {
this.kid = child;
this.fSet = fSet;
this.groupIndex = fSet.getGroupIndex();
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int start = matchResult.getStart(groupIndex);
matchResult.setStart(groupIndex, stringIndex);
int shift = kid.matches(stringIndex, testString, matchResult);
if (shift >= 0) {
return shift;
}
matchResult.setStart(groupIndex, start);
return -1;
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int res = kid.find(stringIndex, testString, matchResult);
if (res >= 0)
matchResult.setStart(groupIndex, res);
return res;
}
@Override
public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
int res = kid.findBack(stringIndex, lastIndex, testString, matchResult);
if (res >= 0)
matchResult.setStart(groupIndex, res);
return res;
}
@Override
public boolean first(AbstractSet set) {
return kid.first(set);
}
/**
* This method is used for replacement backreferenced sets.
*/
@Override
public JointSet processBackRefReplacement() {
BackReferencedSingleSet set = new BackReferencedSingleSet(this);
/*
* We will store a reference to created BackReferencedSingleSet in next
* field. This is needed toprocess replacement of sets correctly since
* sometimes we cannot renew all references to detachable set in the
* current point of traverse. See QuantifierSet and AbstractSet
* processSecondPass() methods for more details.
*/
next = set;
return set;
}
/**
* This method is used for traversing nodes after the first stage of
* compilation.
*/
@Override
public void processSecondPass() {
this.isSecondPassVisited = true;
if (fSet != null && !fSet.isSecondPassVisited) {
/*
* Add here code to do during the pass
*/
/*
* End code to do during the pass
*/
fSet.processSecondPass();
}
if (kid != null && !kid.isSecondPassVisited) {
/*
* Add here code to do during the pass
*/
JointSet set = kid.processBackRefReplacement();
if (set != null) {
kid.isSecondPassVisited = true;
kid = set;
}
/*
* End code to do during the pass
*/
kid.processSecondPass();
}
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* This is base class for special tokens like character classes
* and quantifiers.
*
* @author Nikolay A. Kuznetsov
*/
abstract class SpecialToken {
public static final int TOK_CHARCLASS = 1 << 0;
public static final int TOK_QUANTIFIER = 1 << 1;
/**
* Returns the type of the token, may return following values:
* TOK_CHARCLASS - token representing character class;
* TOK_QUANTIFIER - token representing quantifier;
*
* @return character type.
*/
public abstract int getType();
}

View File

@ -0,0 +1,196 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single supplementary codepoint.
*/
class SupplCharSet extends LeafSet {
/*
* UTF-16 encoding of this supplementary codepoint
*/
private char high = 0;
private char low = 0;
// int value of this supplementary codepoint
private int ch;
public SupplCharSet(int ch) {
charCount = 2;
this.ch = ch;
char[] chUTF16 = Character.toChars(ch);
high = chUTF16[0];
/*
* we suppose that SupplCharSet is build over supplementary codepoints
* only
*/
low = chUTF16[1];
}
@Override
public int accepts(int strIndex, CharSequence testString) {
char high = testString.charAt(strIndex++);
char low = testString.charAt(strIndex);
return ((this.high == high) && (this.low == low)) ? 2 : -1;
}
@Override
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
int strLength = matchResult.getRightBound();
while (strIndex < strLength) {
strIndex = testStr.indexOf(high, strIndex);
if (strIndex < 0)
return -1;
strIndex++;
if (strIndex < strLength) {
char ch = testStr.charAt(strIndex);
if ((low == ch) && (next.matches(strIndex + 1, testString, matchResult) >= 0)) {
return --strIndex;
}
strIndex++;
}
}
return -1;
}
return super.find(strIndex, testString, matchResult);
}
@Override
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
if (testString instanceof String) {
String testStr = (String)testString;
while (lastIndex >= strIndex) {
lastIndex = testStr.lastIndexOf(low, lastIndex);
lastIndex--;
if (lastIndex < 0 || lastIndex < strIndex) {
return -1;
}
if ((high == testStr.charAt(lastIndex)) && next.matches(lastIndex + 2, testString, matchResult) >= 0) {
return lastIndex;
}
lastIndex--;
}
return -1;
}
return super.findBack(strIndex, lastIndex, testString, matchResult);
}
@Override
protected String getName() {
return "" + high + low;
}
protected int getCodePoint() {
return ch;
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof SupplCharSet) {
return ((SupplCharSet)set).getCodePoint() == ch;
} else if (set instanceof SupplRangeSet) {
return ((SupplRangeSet)set).contains(ch);
} else if (set instanceof CharSet) {
return false;
} else if (set instanceof RangeSet) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character from the given char class. This
* character can be supplementary (2 chars needed to represent) or from basic
* multilingual pane (1 needed char to represent it).
*/
class SupplRangeSet extends JointSet {
protected AbstractCharClass chars;
protected boolean alt = false;
public SupplRangeSet(AbstractCharClass cs, AbstractSet next) {
this.chars = cs.getInstance();
this.alt = cs.alt;
this.next = next;
}
public SupplRangeSet(AbstractCharClass cc) {
this.chars = cc.getInstance();
this.alt = cc.alt;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int strLength = matchResult.getRightBound();
int offset = -1;
if (stringIndex < strLength) {
char high = testString.charAt(stringIndex++);
if (contains(high) && (offset = next.matches(stringIndex, testString, matchResult)) > 0) {
return offset;
}
if (stringIndex < strLength) {
char low = testString.charAt(stringIndex++);
if (Character.isSurrogatePair(high, low) && contains(Character.toCodePoint(high, low))) {
return next.matches(stringIndex, testString, matchResult);
}
}
}
return -1;
}
@Override
protected String getName() {
return "range:" + (alt ? "^ " : " ") + chars.toString();
}
public boolean contains(int ch) {
return chars.contains(ch);
}
@Override
public boolean first(AbstractSet set) {
if (set instanceof SupplCharSet) {
return AbstractCharClass.intersects(chars, ((SupplCharSet)set).getCodePoint());
} else if (set instanceof CharSet) {
return AbstractCharClass.intersects(chars, ((CharSet)set).getChar());
} else if (set instanceof SupplRangeSet) {
return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).chars);
} else if (set instanceof RangeSet) {
return AbstractCharClass.intersects(chars, ((RangeSet)set).getChars());
}
return true;
}
protected AbstractCharClass getChars() {
return chars;
}
@Override
public AbstractSet getNext() {
return next;
}
@Override
public void setNext(AbstractSet next) {
this.next = next;
}
@Override
public boolean hasConsumed(MatchResultImpl mr) {
return true;
}
}

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Holds the results of a successful match of a {@link TPattern} against a
* given string. The result is divided into groups, with one group for each
* pair of parentheses in the regular expression and an additional group for
* the whole regular expression. The start, end, and contents of each group
* can be queried.
*
* @see TMatcher
* @see TMatcher#toMatchResult()
*
* @author Nikolay A. Kuznetsov
*/
public interface TMatchResult {
/**
* Returns the index of the first character following the text that matched
* the whole regular expression.
*
* @return the character index.
*/
int end();
/**
* Returns the index of the first character following the text that matched
* a given group.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
*
* @return the character index.
*/
int end(int group);
/**
* Returns the text that matched the whole regular expression.
*
* @return the text.
*/
String group();
/**
* Returns the text that matched a given group of the regular expression.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
*
* @return the text that matched the group.
*/
String group(int group);
/**
* Returns the number of groups in the result, which is always equal to
* the number of groups in the original regular expression.
*
* @return the number of groups.
*/
int groupCount();
/**
* Returns the index of the first character of the text that matched
* the whole regular expression.
*
* @return the character index.
*/
int start();
/**
* Returns the index of the first character of the text that matched a given
* group.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
*
* @return the character index.
*/
int start(int group);
}

View File

@ -0,0 +1,699 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import java.util.ArrayList;
/**
* Provides a means of matching regular expressions against a given input,
* finding occurrences of regular expressions in a given input, or replacing
* parts of a given input. A {@code Matcher} instance has an associated
* {@link TPattern} instance and an input text. A typical use case is to
* iteratively find all occurrences of the {@code Pattern}, until the end of the
* input is reached, as the following example illustrates:
*
* <p/>
*
* <pre>
* Pattern p = Pattern.compile(&quot;[A-Za-z]+&quot;);
*
* Matcher m = p.matcher(&quot;Hello, Android!&quot;);
* while (m.find()) {
* System.out.println(m.group()); // prints &quot;Hello&quot; and &quot;Android&quot;
* }
* </pre>
*
* <p/>
*
* The {@code Matcher} has a state that results from the previous operations.
* For example, it knows whether the most recent attempt to find the
* {@code Pattern} was successful and at which position the next attempt would
* resume the search. Depending on the application's needs, it may become
* necessary to explicitly {@link #reset()} this state from time to time.
*/
public final class TMatcher implements TMatchResult {
static int MODE_FIND = 1 << 0;
static int MODE_MATCH = 1 << 1;
private TPattern pat = null;
private AbstractSet start = null;
private CharSequence string = null;
private MatchResultImpl matchResult = null;
// bounds
private int leftBound = -1;
private int rightBound = -1;
// replacements
private int appendPos = 0;
private String replacement = null;
private String processedRepl = null;
private ArrayList<Object> replacementParts = null;
/**
* Appends a literal part of the input plus a replacement for the current
* match to a given {@link StringBuffer}. The literal part is exactly the
* part of the input between the previous match and the current match. The
* method can be used in conjunction with {@link #find()} and
* {@link #appendTail(StringBuffer)} to walk through the input and replace
* all occurrences of the {@code Pattern} with something else.
*
* @param buffer
* the {@code StringBuffer} to append to.
* @param replacement
* the replacement text.
* @return the {@code Matcher} itself.
* @throws IllegalStateException
* if no successful match has been made.
*/
public TMatcher appendReplacement(StringBuffer buffer, String replacement) {
processedRepl = processReplacement(replacement);
buffer.append(string.subSequence(appendPos, start()));
buffer.append(processedRepl);
appendPos = end();
return this;
}
/**
* Parses replacement string and creates pattern
*/
private String processReplacement(String replacement) {
if (this.replacement != null && this.replacement.equals(replacement)) {
if (replacementParts == null) {
return processedRepl;
} else {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < replacementParts.size(); i++) {
sb.append(replacementParts.get(i));
}
return sb.toString();
}
} else {
this.replacement = replacement;
char[] repl = replacement.toCharArray();
StringBuilder res = new StringBuilder();
replacementParts = null;
int index = 0;
int replacementPos = 0;
boolean nextBackSlashed = false;
while (index < repl.length) {
if (repl[index] == '\\' && !nextBackSlashed) {
nextBackSlashed = true;
index++;
}
if (nextBackSlashed) {
res.append(repl[index]);
nextBackSlashed = false;
} else {
if (repl[index] == '$') {
if (replacementParts == null) {
replacementParts = new ArrayList<>();
}
try {
final int gr = Integer.parseInt(new String(repl, ++index, 1));
if (replacementPos != res.length()) {
replacementParts.add(res.subSequence(replacementPos, res.length()));
replacementPos = res.length();
}
replacementParts.add(new Object() {
private final int grN = gr;
@Override
public String toString() {
return group(grN);
}
});
String group = group(gr);
replacementPos += group.length();
res.append(group);
} catch (IndexOutOfBoundsException iob) {
throw iob;
} catch (Exception e) {
throw new IllegalArgumentException("");
}
} else {
res.append(repl[index]);
}
}
index++;
}
if (replacementParts != null && replacementPos != res.length()) {
replacementParts.add(res.subSequence(replacementPos, res.length()));
}
return res.toString();
}
}
/**
* Provides a new input and resets the {@code Matcher}. This results in the
* region being set to the whole input. Results of a previous find get lost.
* The next attempt to find an occurrence of the {@link TPattern} in the
* string will start at the beginning of the input.
*
* @param input
* the new input sequence.
*
* @return the {@code Matcher} itself.
*/
public TMatcher reset(CharSequence input) {
if (input == null) {
throw new NullPointerException("");
}
this.string = input;
return reset();
}
/**
* Resets the {@code Matcher}. This results in the region being set to the
* whole input. Results of a previous find get lost. The next attempt to
* find an occurrence of the {@link TPattern} in the string will start at
* the beginning of the input.
*
* @return the {@code Matcher} itself.
*/
public TMatcher reset() {
this.leftBound = 0;
this.rightBound = string.length();
matchResult.reset(string, leftBound, rightBound);
appendPos = 0;
replacement = null;
matchResult.previousMatch = -1;
return this;
}
/**
* Resets this matcher and sets a region. Only characters inside the region
* are considered for a match.
*
* @param start
* the first character of the region.
* @param end
* the first character after the end of the region.
* @return the {@code Matcher} itself.
*/
public TMatcher region(int start, int end) {
if (start > end || start < 0 || end < 0 || start > string.length() || end > string.length()) {
throw new IndexOutOfBoundsException(start + ", " + end);
}
this.leftBound = start;
this.rightBound = end;
matchResult.reset(null, start, end);
appendPos = 0;
replacement = null;
return this;
}
/**
* Appends the (unmatched) remainder of the input to the given
* {@link StringBuffer}. The method can be used in conjunction with
* {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
* walk through the input and replace all matches of the {@code Pattern}
* with something else.
*
* @param buffer
* the {@code StringBuffer} to append to.
* @return the {@code StringBuffer}.
* @throws IllegalStateException
* if no successful match has been made.
*/
public StringBuffer appendTail(StringBuffer buffer) {
return buffer.append(string.subSequence(appendPos, string.length()));
}
/**
* Replaces the first occurrence of this matcher's pattern in the input with
* a given string.
*
* @param replacement
* the replacement text.
* @return the modified input string.
*/
public String replaceFirst(String replacement) {
reset();
if (find()) {
StringBuffer sb = new StringBuffer();
appendReplacement(sb, replacement);
return appendTail(sb).toString();
}
return string.toString();
}
/**
* Replaces all occurrences of this matcher's pattern in the input with a
* given string.
*
* @param replacement
* the replacement text.
* @return the modified input string.
*/
public String replaceAll(String replacement) {
StringBuffer sb = new StringBuffer();
reset();
while (find()) {
appendReplacement(sb, replacement);
}
return appendTail(sb).toString();
}
/**
* Returns the {@link TPattern} instance used inside this matcher.
*
* @return the {@code Pattern} instance.
*/
public TPattern pattern() {
return pat;
}
/**
* Returns the text that matched a given group of the regular expression.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
* @return the text that matched the group.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public String group(int group) {
if (group < 0 || group > matchResult.groupCount()) {
throw new IndexOutOfBoundsException("Index " + group + " if out of range [0; " +
matchResult.groupCount() + ")");
}
return matchResult.group(group);
}
/**
* Returns the text that matched the whole regular expression.
*
* @return the text.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public String group() {
return group(0);
}
/**
* Returns the next occurrence of the {@link TPattern} in the input. The
* method starts the search from the given character in the input.
*
* @param start
* The index in the input at which the find operation is to
* begin. If this is less than the start of the region, it is
* automatically adjusted to that value. If it is beyond the end
* of the region, the method will fail.
* @return true if (and only if) a match has been found.
*/
public boolean find(int start) {
int stringLength = string.length();
if (start < 0 || start > stringLength) {
throw new IndexOutOfBoundsException(String.valueOf(start));
}
start = findAt(start);
if (start >= 0 && matchResult.isValid()) {
matchResult.finalizeMatch();
return true;
}
matchResult.startIndex = -1;
return false;
}
private int findAt(int startIndex) {
matchResult.reset();
matchResult.setMode(TMatcher.MODE_FIND);
matchResult.setStartIndex(startIndex);
int foundIndex = start.find(startIndex, string, matchResult);
if (foundIndex == -1) {
matchResult.hitEnd = true;
}
return foundIndex;
}
/**
* Returns the next occurrence of the {@link TPattern} in the input. If a
* previous match was successful, the method continues the search from the
* first character following that match in the input. Otherwise it searches
* either from the region start (if one has been set), or from position 0.
*
* @return true if (and only if) a match has been found.
*/
public boolean find() {
int length = string.length();
if (!hasTransparentBounds())
length = rightBound;
if (matchResult.startIndex >= 0 && matchResult.mode() == TMatcher.MODE_FIND) {
matchResult.startIndex = matchResult.end();
if (matchResult.end() == matchResult.start()) {
matchResult.startIndex++;
}
return matchResult.startIndex <= length ? find(matchResult.startIndex) : false;
} else {
return find(leftBound);
}
}
/**
* Returns the index of the first character of the text that matched a given
* group.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
* @return the character index.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public int start(int group) {
return matchResult.start(group);
}
/**
* Returns the index of the first character following the text that matched
* a given group.
*
* @param group
* the group, ranging from 0 to groupCount() - 1, with 0
* representing the whole pattern.
* @return the character index.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public int end(int group) {
return matchResult.end(group);
}
/**
* Tries to match the {@link TPattern} against the entire region (or the
* entire input, if no region has been set).
*
* @return true if (and only if) the {@code Pattern} matches the entire
* region.
*/
public boolean matches() {
return lookingAt(leftBound, TMatcher.MODE_MATCH);
}
/**
* Returns a replacement string for the given one that has all backslashes
* and dollar signs escaped.
*
* @param s
* the input string.
* @return the input string, with all backslashes and dollar signs having
* been escaped.
*/
public static String quoteReplacement(String s) {
// first check whether we have smth to quote
if (s.indexOf('\\') < 0 && s.indexOf('$') < 0)
return s;
StringBuilder res = new StringBuilder(s.length() * 2);
char ch;
int len = s.length();
for (int i = 0; i < len; i++) {
switch (ch = s.charAt(i)) {
case '$':
res.append('\\');
res.append('$');
break;
case '\\':
res.append('\\');
res.append('\\');
break;
default:
res.append(ch);
}
}
return res.toString();
}
/**
* Runs match starting from <code>set</code> specified against input
* sequence starting at <code>index</code> specified; Result of the match
* will be stored into matchResult instance;
*/
private boolean runMatch(AbstractSet set, int index, MatchResultImpl matchResult) {
if (set.matches(index, string, matchResult) >= 0) {
matchResult.finalizeMatch();
return true;
}
return false;
}
/**
* Tries to match the {@link TPattern}, starting from the beginning of the
* region (or the beginning of the input, if no region has been set).
* Doesn't require the {@code Pattern} to match against the whole region.
*
* @return true if (and only if) the {@code Pattern} matches.
*/
public boolean lookingAt() {
return lookingAt(leftBound, TMatcher.MODE_FIND);
}
private boolean lookingAt(int startIndex, int mode) {
matchResult.reset();
matchResult.setMode(mode);
matchResult.setStartIndex(startIndex);
return runMatch(start, startIndex, matchResult);
}
/**
* Returns the index of the first character of the text that matched the
* whole regular expression.
*
* @return the character index.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public int start() {
return start(0);
}
/**
* Returns the number of groups in the results, which is always equal to the
* number of groups in the original regular expression.
*
* @return the number of groups.
*/
@Override
public int groupCount() {
return matchResult.groupCount();
}
/**
* Returns the index of the first character following the text that matched
* the whole regular expression.
*
* @return the character index.
* @throws IllegalStateException
* if no successful match has been made.
*/
@Override
public int end() {
return end(0);
}
/**
* Converts the current match into a separate {@link TMatchResult} instance
* that is independent from this matcher. The new object is unaffected when
* the state of this matcher changes.
*
* @return the new {@code MatchResult}.
* @throws IllegalStateException
* if no successful match has been made.
*/
public TMatchResult toMatchResult() {
return this.matchResult.cloneImpl();
}
/**
* Determines whether this matcher has anchoring bounds enabled or not. When
* anchoring bounds are enabled, the start and end of the input match the
* '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
* by default.
*
* @param value
* the new value for anchoring bounds.
* @return the {@code Matcher} itself.
*/
public TMatcher useAnchoringBounds(boolean value) {
matchResult.useAnchoringBounds(value);
return this;
}
/**
* Indicates whether this matcher has anchoring bounds enabled. When
* anchoring bounds are enabled, the start and end of the input match the
* '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
* by default.
*
* @return true if (and only if) the {@code Matcher} uses anchoring bounds.
*/
public boolean hasAnchoringBounds() {
return matchResult.hasAnchoringBounds();
}
/**
* Determines whether this matcher has transparent bounds enabled or not.
* When transparent bounds are enabled, the parts of the input outside the
* region are subject to lookahead and lookbehind, otherwise they are not.
* Transparent bounds are disabled by default.
*
* @param value
* the new value for transparent bounds.
* @return the {@code Matcher} itself.
*/
public TMatcher useTransparentBounds(boolean value) {
matchResult.useTransparentBounds(value);
return this;
}
/**
* Indicates whether this matcher has transparent bounds enabled. When
* transparent bounds are enabled, the parts of the input outside the region
* are subject to lookahead and lookbehind, otherwise they are not.
* Transparent bounds are disabled by default.
*
* @return true if (and only if) the {@code Matcher} uses anchoring bounds.
*/
public boolean hasTransparentBounds() {
return matchResult.hasTransparentBounds();
}
/**
* Returns this matcher's region start, that is, the first character that is
* considered for a match.
*
* @return the start of the region.
*/
public int regionStart() {
return matchResult.getLeftBound();
}
/**
* Returns this matcher's region end, that is, the first character that is
* not considered for a match.
*
* @return the end of the region.
*/
public int regionEnd() {
return matchResult.getRightBound();
}
/**
* Indicates whether more input might change a successful match into an
* unsuccessful one.
*
* @return true if (and only if) more input might change a successful match
* into an unsuccessful one.
*/
public boolean requireEnd() {
return matchResult.requireEnd;
}
/**
* Indicates whether the last match hit the end of the input.
*
* @return true if (and only if) the last match hit the end of the input.
*/
public boolean hitEnd() {
return matchResult.hitEnd;
}
/**
* Sets a new pattern for the {@code Matcher}. Results of a previous find
* get lost. The next attempt to find an occurrence of the {@link TPattern}
* in the string will start at the beginning of the input.
*
* @param pattern
* the new {@code Pattern}.
*
* @return the {@code Matcher} itself.
*/
public TMatcher usePattern(TPattern pattern) {
if (pattern == null) {
throw new IllegalArgumentException("");
}
int startIndex = matchResult.getPreviousMatchEnd();
int mode = matchResult.mode();
this.pat = pattern;
this.start = pattern.start;
matchResult = new MatchResultImpl(this.string, leftBound, rightBound, pattern.groupCount(),
pattern.compCount(), pattern.consCount());
matchResult.setStartIndex(startIndex);
matchResult.setMode(mode);
return this;
}
TMatcher(TPattern pat, CharSequence cs) {
this.pat = pat;
this.start = pat.start;
this.string = cs;
this.leftBound = 0;
this.rightBound = string.length();
matchResult = new MatchResultImpl(cs, leftBound, rightBound, pat.groupCount(), pat.compCount(), pat.consCount());
}
@Override
public String toString() {
String lastMatch = "";
try {
lastMatch = Integer.toString(start());
} catch (IllegalStateException e) {
}
return "Regex[pattern=" + pat + " region=" + matchResult.getLeftBound() + "," + matchResult.getRightBound() +
" lastmatch=" + lastMatch + "]";
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
import java.util.Arrays;
/**
* Encapsulates a syntax error that occurred during the compilation of a
* {@link TPattern}. Might include a detailed description, the original regular
* expression, and the index at which the error occurred.
*
* @see TPattern#compile(String)
* @see TPattern#compile(java.lang.String,int)
*
* @author Nikolay A. Kuznetsov
*/
public class TPatternSyntaxException extends IllegalArgumentException {
private static final long serialVersionUID = -3864639126226059218L;
/**
* Holds the description of the syntax error, or null if the description is
* not known.
*/
private String desc;
/**
* Holds the syntactically incorrect regular expression, or null if the
* regular expression is not known.
*/
private String pattern;
/**
* Holds the index around which the error occured, or -1, in case it is
* unknown.
*/
private int index = -1;
/**
* Creates a new PatternSyntaxException for a given message, pattern, and
* error index.
*
* @param description
* the description of the syntax error, or {@code null} if the
* description is not known.
* @param pattern
* the syntactically incorrect regular expression, or
* {@code null} if the regular expression is not known.
* @param index
* the character index around which the error occurred, or -1 if
* the index is not known.
*/
public TPatternSyntaxException(String description, String pattern, int index) {
this.desc = description;
this.pattern = pattern;
this.index = index;
}
/**
* Returns the syntactically incorrect regular expression.
*
* @return the regular expression.
*
*/
public String getPattern() {
return pattern;
}
/**
* Returns a detailed error message for the exception. The message is
* potentially multi-line, and it might include a detailed description, the
* original regular expression, and the index at which the error occured.
*
* @return the error message.
*/
@Override
public String getMessage() {
String filler = "";
if (index >= 1) {
char[] temp = new char[index];
Arrays.fill(temp, ' ');
filler = new String(temp);
}
return desc + ((pattern != null && pattern.length() != 0) ? index + ", " + pattern + ", " + filler : "");
}
/**
* Returns the description of the syntax error, or {@code null} if the
* description is not known.
*
* @return the description.
*/
public String getDescription() {
return desc;
}
/**
* Returns the character index around which the error occurred, or -1 if the
* index is not known.
*
* @return the index.
*
*/
public int getIndex() {
return index;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Unicode case insensitive back reference (i.e. \1-9) node.
*
* @author Nikolay A. Kuznetsov
*/
class UCIBackReferenceSet extends CIBackReferenceSet {
int groupIndex;
public UCIBackReferenceSet(int groupIndex, int consCounter) {
super(groupIndex, consCounter);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
String group = getString(matchResult);
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
return -1;
for (int i = 0; i < group.length(); i++) {
if (Character.toLowerCase(Character.toUpperCase(group.charAt(i))) != Character.toLowerCase(Character
.toUpperCase(testString.charAt(stringIndex + i)))) {
return -1;
}
}
matchResult.setConsumed(consCounter, group.length());
return next.matches(stringIndex + group.length(), testString, matchResult);
}
@Override
public String getName() {
return "UCI back reference: " + this.groupIndex; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character in unicode case
* insensitive manner.
*
* @author Nikolay A. Kuznetsov
*/
class UCICharSet extends LeafSet {
private char ch;
public UCICharSet(char ch) {
this.ch = Character.toLowerCase(Character.toUpperCase(ch));
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return (this.ch == Character.toLowerCase(Character
.toUpperCase(testString.charAt(strIndex)))) ? 1 : -1;
}
@Override
protected String getName() {
return "UCI " + ch;
}
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents Unicode case insensitive
* canonical decomposition of
* Unicode character. Is used when
* CANON_EQ flag of Pattern class
* is specified.
*/
class UCIDecomposedCharSet extends DecomposedCharSet{
/*
* Just only a stub
*/
public UCIDecomposedCharSet(int [] decomp, int decomposedCharLength) {
super(decomp, decomposedCharLength);
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character from the given char class. Note,
* this class contains normalized characters fo unicode case, asci case is
* supported through adding both symbols to the range.
*
* @author Nikolay A. Kuznetsov
*/
class UCIRangeSet extends LeafSet {
private AbstractCharClass chars;
private boolean alt = false;
public UCIRangeSet(AbstractCharClass cs, AbstractSet next) {
super(next);
this.chars = cs.getInstance();
this.alt = cs.alt;
}
public UCIRangeSet(AbstractCharClass cc) {
this.chars = cc.getInstance();
this.alt = cc.alt;
}
@Override
public int accepts(int strIndex, CharSequence testString) {
return (chars.contains(Character.toLowerCase(Character
.toUpperCase(testString.charAt(strIndex))))) ? 1 : -1;
}
@Override
protected String getName() {
return "UCI range:" + (alt ? "^ " : " ") + chars.toString();
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Node accepting substrings in unicode case insensitive manner.
*
* @author Nikolay A. Kuznetsov
*/
class UCISequenceSet extends LeafSet {
private String string = null;
UCISequenceSet(StringBuffer substring) {
StringBuilder res = new StringBuilder();
for (int i = 0; i < substring.length(); i++) {
res.append(Character.toLowerCase(Character.toUpperCase(substring.charAt(i))));
}
this.string = res.toString();
this.charCount = res.length();
}
@Override
public int accepts(int strIndex, CharSequence testString) {
for (int i = 0; i < string.length(); i++) {
if (string.charAt(i) != Character.toLowerCase(Character.toUpperCase(testString.charAt(strIndex + i)))) {
return -1;
}
}
return string.length();
}
@Override
public String getName() {
return "UCI sequence: " + string; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single supplementary codepoint in Unicode case
* insensitive manner.
*/
class UCISupplCharSet extends LeafSet {
// int value of this supplementary codepoint
private int ch;
public UCISupplCharSet(int ch) {
charCount = 2;
this.ch = Character.toLowerCase(Character.toUpperCase(ch));
}
@Override
public int accepts(int strIndex, CharSequence testString) {
char high = testString.charAt(strIndex++);
char low = testString.charAt(strIndex);
return (this.ch == Character.toLowerCase(Character.toUpperCase(Character.toCodePoint(high, low)))) ? 2 : -1;
}
@Override
protected String getName() {
return "UCI " + new String(Character.toChars(ch));
}
}

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
* hereby granted, free of charge, to any person obtaining a copy of the
* Unicode data files and any associated documentation (the "Data Files")
* or Unicode software and any associated documentation (the "Software")
* to deal in the Data Files or Software without restriction, including without
* limitation the rights to use, copy, modify, merge, publish, distribute,
* and/or sell copies of the Data Files or Software, and to permit persons
* to whom the Data Files or Software are furnished to do so, provided that
* (a) the above copyright notice(s) and this permission notice appear with
* all copies of the Data Files or Software, (b) both the above copyright
* notice(s) and this permission notice appear in associated documentation,
* and (c) there is clear notice in each modified Data File or in the Software
* as well as in the documentation associated with the Data File(s) or Software
* that the data or software has been modified.
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in these Data Files or Software without prior written
* authorization of the copyright holder.
*
* 2. Additional terms from the Database:
*
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
*
* Disclaimer
*
* The Unicode Character Database is provided as is by Unicode, Inc.
* No claims are made as to fitness for any particular purpose. No warranties
* of any kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been purchased
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
* will be exchange of defective media within 90 days of receipt. This disclaimer
* is applicable for all other data files accompanying the Unicode Character Database,
* some of which have been compiled by the Unicode Consortium, and some of which
* have been supplied by other sources.
*
* Limitations on Rights to Redistribute This Data
*
* Recipient is granted the right to make copies in any form for internal
* distribution and to freely use the information supplied in the creation of
* products supporting the UnicodeTM Standard. The files in
* the Unicode Character Database can be redistributed to third parties or other
* organizations (whether for profit or not) as long as this notice and the disclaimer
* notice are retained. Information can be extracted from these files and used
* in documentation or programs, as long as there is an accompanying notice
* indicating the source.
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents node accepting single character from the given char class
* in Unicode case insensitive manner.
* This character can be supplementary (2 chars to represent) or from
* basic multilingual pane (1 char to represent).
*/
class UCISupplRangeSet extends SupplRangeSet{
public UCISupplRangeSet(AbstractCharClass cs, AbstractSet next) {
super(cs, next);
}
public UCISupplRangeSet(AbstractCharClass cc) {
super(cc);
}
@Override
public boolean contains(int ch) {
return chars.contains(Character.toLowerCase(Character.toUpperCase(ch)));
}
@Override
protected String getName() {
return "UCI range:" + (alt ? "^ " : " ") + chars.toString();
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Unix line terminator, accepting only \n.
*
* @author Nikolay A. Kuznetsov
*/
final class UEOLSet extends AbstractSet {
private int consCounter;
public UEOLSet(int counter) {
this.consCounter = counter;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length();
if (strIndex >= rightBound) {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
}
// check final line terminator;
if ((rightBound - strIndex) == 1 && testString.charAt(strIndex) == '\n') {
matchResult.setConsumed(consCounter, 1);
return next.matches(strIndex + 1, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons;
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
matchResult.setConsumed(consCounter, -1);
return res;
}
@Override
protected String getName() {
return "<EOL>"; //$NON-NLS-1$
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Unix style multiline end-of-line node.
*
* @author Nikolay A. Kuznetsov
*/
class UMultiLineEOLSet extends AbstractSet {
private int consCounter;
public UMultiLineEOLSet(int counter) {
this.consCounter = counter;
}
@Override
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
int strDif = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() - strIndex : testString.length() -
strIndex;
if (strDif <= 0) {
matchResult.setConsumed(consCounter, 0);
return next.matches(strIndex, testString, matchResult);
} else if (testString.charAt(strIndex) == '\n') {
matchResult.setConsumed(consCounter, 1);
return next.matches(strIndex + 1, testString, matchResult);
}
return -1;
}
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
int cons;
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
matchResult.setConsumed(consCounter, -1);
return res;
}
@Override
protected String getName() {
return "<Unix MultiLine $>";
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Unicode category (i.e. Ll, Lu).
*
* @author Nikolay A. Kuznetsov
*/
class UnicodeCategory extends AbstractCharClass {
protected int category;
public UnicodeCategory(int category) {
this.category = category;
}
@Override
public boolean contains(int ch) {
return alt ^ (category == Character.getType((char) ch));
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Unicode category scope (i.e IsL, IsM, ...)
* @author Nikolay A. Kuznetsov
*/
class UnicodeCategoryScope extends UnicodeCategory {
public UnicodeCategoryScope(int category) {
super(category);
}
@Override
public boolean contains(int ch) {
return alt ^ ((category >> Character.getType((char) ch)) & 1) != 0;
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Greedy quantifier node for the case where there is no intersection with next
* node and normal quantifiers could be treated as greedy and possessive.
*
* @author Nikolay A. Kuznetsov
*/
class UnifiedQuantifierSet extends LeafQuantifierSet {
public UnifiedQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
public UnifiedQuantifierSet(LeafQuantifierSet quant) {
super((LeafSet)quant.getInnerSet(), quant.getNext(), quant.getType());
innerSet.setNext(this);
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
leaf.accepts(stringIndex, testString) > 0)
stringIndex += leaf.charCount();
return next.matches(stringIndex, testString, matchResult);
}
@Override
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
int startSearch = next.find(stringIndex, testString, matchResult);
if (startSearch < 0)
return -1;
int newSearch = startSearch - leaf.charCount();
while (newSearch >= stringIndex && leaf.accepts(newSearch, testString) > 0) {
startSearch = newSearch;
newSearch -= leaf.charCount();
}
return startSearch;
}
}

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Kuznetsov
*/
package org.teavm.classlib.java.util.regex;
/**
* Represents word boundary, checks current character and previous one if
* different types returns true;
*
* @author Nikolay A. Kuznetsov
*/
class WordBoundary extends AbstractSet {
boolean positive;
public WordBoundary(boolean positive) {
this.positive = positive;
}
@Override
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
boolean left;
boolean right;
char ch1 = stringIndex >= matchResult.getRightBound() ? ' ' : testString.charAt(stringIndex);
char ch2 = stringIndex == 0 ? ' ' : testString.charAt(stringIndex - 1);
int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound();
left = (ch1 == ' ') || isSpace(ch1, stringIndex, leftBound, testString);
right = (ch2 == ' ') || isSpace(ch2, stringIndex - 1, leftBound, testString);
return ((left ^ right) ^ positive) ? -1 : next.matches(stringIndex, testString, matchResult);
}
/**
* Returns false, because word boundary does not consumes any characters and
* do not move string index.
*/
@Override
public boolean hasConsumed(MatchResultImpl matchResult) {
// only checks boundary, do not consumes characters
return false;
}
@Override
protected String getName() {
return "WordBoundary"; //$NON-NLS-1$
}
private boolean isSpace(char ch, int index, int leftBound, CharSequence testString) {
if (Character.isLetterOrDigit(ch) || ch == '_')
return false;
if (Character.getType(ch) == Character.NON_SPACING_MARK) {
for (; --index >= leftBound;) {
ch = testString.charAt(index);
if (Character.isLetterOrDigit(ch))
return false;
if (Character.getType(ch) != Character.NON_SPACING_MARK)
return true;
}
}
return true;
}
}

View File

@ -0,0 +1,234 @@
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import junit.framework.TestCase;
/**
* Tests Matcher methods
*/
@SuppressWarnings("nls")
public class Matcher2Test extends TestCase {
public void test_toString() {
Pattern p = Pattern.compile("foo");
Matcher m = p.matcher("bar");
assertNotNull(m.toString());
}
public void testErrorConditions() throws PatternSyntaxException {
// Test match cursors in absence of a match
Pattern p = Pattern.compile("foo");
Matcher m = p.matcher("bar");
assertFalse(m.matches());
try {
m.start();
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.end();
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.group();
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.start(1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.end(1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.group(1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
// regression test for HARMONY-2418
try {
m.usePattern(null);
fail("IllegalArgumentException expected");
} catch (IllegalArgumentException e) {
// PASSED
}
}
public void testErrorConditions2() throws PatternSyntaxException {
// Test match cursors in absence of a match
Pattern p = Pattern.compile("(foo[0-9])(bar[a-z])");
Matcher m = p.matcher("foo1barzfoo2baryfoozbar5");
assertTrue(m.find());
assertEquals(0, m.start());
assertEquals(8, m.end());
assertEquals(0, m.start(1));
assertEquals(4, m.end(1));
assertEquals(4, m.start(2));
assertEquals(8, m.end(2));
try {
m.start(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.end(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.group(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.start(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.end(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.group(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
assertTrue(m.find());
assertEquals(8, m.start());
assertEquals(16, m.end());
assertEquals(8, m.start(1));
assertEquals(12, m.end(1));
assertEquals(12, m.start(2));
assertEquals(16, m.end(2));
try {
m.start(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.end(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.group(3);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.start(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.end(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
try {
m.group(-1);
fail("IndexOutOfBoundsException expected");
} catch (IndexOutOfBoundsException e) {
}
assertFalse(m.find());
try {
m.start(3);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.end(3);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.group(3);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.start(-1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.end(-1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
try {
m.group(-1);
fail("IllegalStateException expected");
} catch (IllegalStateException e) {
}
}
/*
* Regression test for HARMONY-997
*/
public void testReplacementBackSlash() {
String str = "replace me";
String replacedString = "me";
String substitutionString = "\\";
Pattern pat = Pattern.compile(replacedString);
Matcher mat = pat.matcher(str);
try {
mat.replaceAll(substitutionString);
fail("IndexOutOfBoundsException should be thrown");
} catch (IndexOutOfBoundsException e) {
}
}
}

View File

@ -0,0 +1,772 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import static org.junit.Assert.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
@SuppressWarnings("nls")
public class MatcherTest {
String[] testPatterns = {
"(a|b)*abb",
"(1*2*3*4*)*567",
"(a|b|c|d)*aab",
"(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
"(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
"(a|b)*(a|b)*A(a|b)*lice.*",
"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
+ "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do" };
String[] groupPatterns = { "(a|b)*aabb", "((a)|b)*aabb", "((a|b)*)a(abb)",
"(((a)|(b))*)aabb", "(((a)|(b))*)aa(b)b", "(((a)|(b))*)a(a(b)b)" };
@Test
public void testRegionsIntInt() {
Pattern p = Pattern.compile("x*");
Matcher m = p.matcher("axxxxxa");
assertFalse(m.matches());
m.region(1, 6);
assertEquals(1, m.regionStart());
assertEquals(6, m.regionEnd());
assertTrue(m.matches());
try {
m.region(1, 0);
fail("expected an IOOBE");
} catch(IndexOutOfBoundsException e) {
}
try {
m.region(-1, 2);
fail("expected an IOOBE");
} catch(IndexOutOfBoundsException e) {
}
try {
m.region(10, 11);
fail("expected an IOOBE");
} catch(IndexOutOfBoundsException e) {
}
try {
m.region(1, 10);
fail("expected an IOOBE");
} catch(IndexOutOfBoundsException e) {
}
}
@Test
public void testAppendReplacement() {
Pattern pat = Pattern.compile("XX");
Matcher m = pat.matcher("Today is XX-XX-XX ...");
StringBuffer sb = new StringBuffer();
for (int i = 0; m.find(); i++) {
m.appendReplacement(sb, new Integer(i * 10 + i).toString());
}
m.appendTail(sb);
assertEquals("Today is 0-11-22 ...", sb.toString());
}
@Test
public void testAppendReplacementRef() {
Pattern p = Pattern.compile("xx (rur|\\$)");
Matcher m = p.matcher("xx $ equals to xx rur.");
StringBuffer sb = new StringBuffer();
for (int i = 1; m.find(); i *= 30) {
String rep = new Integer(i).toString() + " $1";
m.appendReplacement(sb, rep);
}
m.appendTail(sb);
assertEquals("1 $ equals to 30 rur.", sb.toString());
}
@Test
public void testReplaceAll() {
String input = "aabfooaabfooabfoob";
String pattern = "a*b";
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
assertEquals("-foo-foo-foo-", mat.replaceAll("-"));
}
@Test
public void testResetCharSequence() {
Pattern p = Pattern.compile("abcd");
Matcher m = p.matcher("abcd");
assertTrue(m.matches());
m.reset("efgh");
assertFalse(m.matches());
try {
m.reset(null);
fail("expected a NPE");
} catch (NullPointerException e) {
}
}
@Test
public void testAppendSlashes() {
Pattern p = Pattern.compile("\\\\");
Matcher m = p.matcher("one\\cat\\two\\cats\\in\\the\\yard");
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "\\\\");
}
m.appendTail(sb);
assertEquals("one\\cat\\two\\cats\\in\\the\\yard", sb.toString());
}
@Test
public void testReplaceFirst() {
String input = "zzzdogzzzdogzzz";
String pattern = "dog";
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
assertEquals("zzzcatzzzdogzzz", mat.replaceFirst("cat"));
}
@Test
public void testPattern() {
for (String element : testPatterns) {
Pattern test = Pattern.compile(element);
assertEquals(test, test.matcher("aaa").pattern());
}
for (String element : testPatterns) {
assertEquals(element, Pattern.compile(element).matcher("aaa")
.pattern().toString());
}
}
/*
* Class under test for Matcher reset()
*/
public void testReset() {
}
@Test
public void testGroupint() {
String positiveTestString = "ababababbaaabb";
// test IndexOutOfBoundsException
// //
for (int i = 0; i < groupPatterns.length; i++) {
Pattern test = Pattern.compile(groupPatterns[i]);
Matcher mat = test.matcher(positiveTestString);
mat.matches();
try {
// groupPattern <index + 1> equals to number of groups
// of the specified pattern
// //
mat.group(i + 2);
fail("IndexOutBoundsException expected");
mat.group(i + 100);
fail("IndexOutBoundsException expected");
mat.group(-1);
fail("IndexOutBoundsException expected");
mat.group(-100);
fail("IndexOutBoundsException expected");
} catch (IndexOutOfBoundsException iobe) {
}
}
String[][] groupResults = { { "a" }, { "a", "a" },
{ "ababababba", "a", "abb" }, { "ababababba", "a", "a", "b" },
{ "ababababba", "a", "a", "b", "b" },
{ "ababababba", "a", "a", "b", "abb", "b" }, };
for (int i = 0; i < groupPatterns.length; i++) {
Pattern test = Pattern.compile(groupPatterns[i]);
Matcher mat = test.matcher(positiveTestString);
mat.matches();
for (int j = 0; j < groupResults[i].length; j++) {
assertEquals("i: " + i + " j: " + j, groupResults[i][j], mat
.group(j + 1));
}
}
}
@Test
public void testGroup() {
String positiveTestString = "ababababbaaabb";
String negativeTestString = "gjhfgdsjfhgcbv";
for (String element : groupPatterns) {
Pattern test = Pattern.compile(element);
Matcher mat = test.matcher(positiveTestString);
mat.matches();
// test result
assertEquals(positiveTestString, mat.group());
// test equal to group(0) result
assertEquals(mat.group(0), mat.group());
}
for (String element : groupPatterns) {
Pattern test = Pattern.compile(element);
Matcher mat = test.matcher(negativeTestString);
mat.matches();
try {
mat.group();
fail("IllegalStateException expected for <false> matches result");
} catch (IllegalStateException ise) {
}
}
}
@Test
public void testGroupPossessive() {
Pattern pat = Pattern.compile("((a)|(b))++c");
Matcher mat = pat.matcher("aac");
mat.matches();
assertEquals("a", mat.group(1));
}
/*
* Class under test for boolean find(int)
*/
public void testFindint() {
}
/*
* Class under test for int start(int)
*/
public void testStartint() {
}
/*
* Class under test for int end(int)
*/
public void testEndint() {
}
@Test
public void testMatchesMisc() {
String[][] posSeq = {
{ "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
{ "213567", "12324567", "1234567", "213213567",
"21312312312567", "444444567" },
{ "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
{ "213234567", "3458", "0987654", "7689546432", "0398576",
"98432", "5" },
{
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
{ "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
"abbbAbbbliceaaa", "Alice" },
{ "a123", "bnxnvgds156", "for", "while", "if", "struct" }
};
for (int i = 0; i < testPatterns.length; i++) {
Pattern pat = Pattern.compile(testPatterns[i]);
for (int j = 0; j < posSeq[i].length; j++) {
Matcher mat = pat.matcher(posSeq[i][j]);
assertTrue("Incorrect match: " + testPatterns[i] + " vs "
+ posSeq[i][j], mat.matches());
}
}
}
@Test
public void testMatchesQuantifiers() {
String[] testPatternsSingles = { "a{5}", "a{2,4}", "a{3,}" };
String[] testPatternsMultiple = { "((a)|(b)){1,2}abb",
"((a)|(b)){2,4}", "((a)|(b)){3,}" };
String[][] stringSingles = { { "aaaaa", "aaa" },
{ "aa", "a", "aaa", "aaaaaa", "aaaa", "aaaaa" },
{ "aaa", "a", "aaaa", "aa" }, };
String[][] stringMultiples = { { "ababb", "aba" },
{ "ab", "b", "bab", "ababa", "abba", "abababbb" },
{ "aba", "b", "abaa", "ba" }, };
for (int i = 0; i < testPatternsSingles.length; i++) {
Pattern pat = Pattern.compile(testPatternsSingles[i]);
for (int j = 0; j < stringSingles.length / 2; j++) {
assertTrue("Match expected, but failed: " + pat.pattern()
+ " : " + stringSingles[i][j], pat.matcher(
stringSingles[i][j * 2]).matches());
assertFalse("Match failure expected, but match succeed: "
+ pat.pattern() + " : " + stringSingles[i][j * 2 + 1],
pat.matcher(stringSingles[i][j * 2 + 1]).matches());
}
}
for (int i = 0; i < testPatternsMultiple.length; i++) {
Pattern pat = Pattern.compile(testPatternsMultiple[i]);
for (int j = 0; j < stringMultiples.length / 2; j++) {
assertTrue("Match expected, but failed: " + pat.pattern()
+ " : " + stringMultiples[i][j], pat.matcher(
stringMultiples[i][j * 2]).matches());
assertFalse(
"Match failure expected, but match succeed: "
+ pat.pattern() + " : "
+ stringMultiples[i][j * 2 + 1], pat.matcher(
stringMultiples[i][j * 2 + 1]).matches());
}
}
}
@Test
public void testQuantVsGroup() {
String patternString = "(d{1,3})((a|c)*)(d{1,3})((a|c)*)(d{1,3})";
String testString = "dacaacaacaaddaaacaacaaddd";
Pattern pat = Pattern.compile(patternString);
Matcher mat = pat.matcher(testString);
mat.matches();
assertEquals("dacaacaacaaddaaacaacaaddd", mat.group());
assertEquals("d", mat.group(1));
assertEquals("acaacaacaa", mat.group(2));
assertEquals("dd", mat.group(4));
assertEquals("aaacaacaa", mat.group(5));
assertEquals("ddd", mat.group(7));
}
public void testLookingAt() {
}
/*
* Class under test for boolean find()
*/
public void testFind() {
String testPattern = "(abb)";
String testString = "cccabbabbabbabbabb";
Pattern pat = Pattern.compile(testPattern);
Matcher mat = pat.matcher(testString);
int start = 3;
int end = 6;
while (mat.find()) {
assertEquals(start, mat.start(1));
assertEquals(end, mat.end(1));
start = end;
end += 3;
}
testPattern = "(\\d{1,3})";
testString = "aaaa123456789045";
Pattern pat2 = Pattern.compile(testPattern);
Matcher mat2 = pat2.matcher(testString);
start = 4;
int length = 3;
while (mat2.find()) {
assertEquals(testString.substring(start, start + length), mat2
.group(1));
start += length;
}
}
@Test
public void testSEOLsymbols() {
Pattern pat = Pattern.compile("^a\\(bb\\[$");
Matcher mat = pat.matcher("a(bb[");
assertTrue(mat.matches());
}
/*
* Class under test for int start()
*/
public void testStart() {
}
@Test
public void testGroupCount() {
for (int i = 0; i < groupPatterns.length; i++) {
Pattern test = Pattern.compile(groupPatterns[i]);
Matcher mat = test.matcher("ababababbaaabb");
mat.matches();
assertEquals(i + 1, mat.groupCount());
}
}
@Test
public void testRelactantQuantifiers() {
Pattern pat = Pattern.compile("(ab*)*b");
Matcher mat = pat.matcher("abbbb");
if (mat.matches()) {
assertEquals("abbb", mat.group(1));
} else {
fail("Match expected: (ab*)*b vs abbbb");
}
}
@Test
public void testEnhancedFind() {
String input = "foob";
String pattern = "a*b";
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
mat.find();
assertEquals("b", mat.group());
}
@Test
public void testPosCompositeGroup() {
String[] posExamples = { "aabbcc", "aacc", "bbaabbcc" };
String[] negExamples = { "aabb", "bb", "bbaabb" };
Pattern posPat = Pattern.compile("(aa|bb){1,3}+cc");
Pattern negPat = Pattern.compile("(aa|bb){1,3}+bb");
Matcher mat;
for (String element : posExamples) {
mat = posPat.matcher(element);
assertTrue(mat.matches());
}
for (String element : negExamples) {
mat = negPat.matcher(element);
assertFalse(mat.matches());
}
assertTrue(Pattern.matches("(aa|bb){1,3}+bb", "aabbaabb"));
}
@Test
public void testPosAltGroup() {
String[] posExamples = { "aacc", "bbcc", "cc" };
String[] negExamples = { "bb", "aa" };
Pattern posPat = Pattern.compile("(aa|bb)?+cc");
Pattern negPat = Pattern.compile("(aa|bb)?+bb");
Matcher mat;
for (String element : posExamples) {
mat = posPat.matcher(element);
assertTrue(posPat.toString() + " vs: " + element, mat.matches());
}
for (String element : negExamples) {
mat = negPat.matcher(element);
assertFalse(mat.matches());
}
assertTrue(Pattern.matches("(aa|bb)?+bb", "aabb"));
}
@Test
public void testRelCompGroup() {
Matcher mat;
Pattern pat;
String res = "";
for (int i = 0; i < 4; i++) {
pat = Pattern.compile("((aa|bb){" + i + ",3}?).*cc");
mat = pat.matcher("aaaaaacc");
assertTrue(pat.toString() + " vs: " + "aaaaaacc", mat.matches());
assertEquals(res, mat.group(1));
res += "aa";
}
}
@Test
public void testRelAltGroup() {
Matcher mat;
Pattern pat;
pat = Pattern.compile("((aa|bb)??).*cc");
mat = pat.matcher("aacc");
assertTrue(pat.toString() + " vs: " + "aacc", mat.matches());
assertEquals("", mat.group(1));
pat = Pattern.compile("((aa|bb)??)cc");
mat = pat.matcher("aacc");
assertTrue(pat.toString() + " vs: " + "aacc", mat.matches());
assertEquals("aa", mat.group(1));
}
@Test
public void testIgnoreCase() {
Pattern pat = Pattern.compile("(aa|bb)*", Pattern.CASE_INSENSITIVE);
Matcher mat = pat.matcher("aAbb");
assertTrue(mat.matches());
pat = Pattern.compile("(a|b|c|d|e)*", Pattern.CASE_INSENSITIVE);
mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa");
assertTrue(mat.matches());
pat = Pattern.compile("[a-e]*", Pattern.CASE_INSENSITIVE);
mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa");
assertTrue(mat.matches());
}
@Test
public void testQuoteReplacement() {
assertEquals("\\\\aaCC\\$1", Matcher.quoteReplacement("\\aaCC$1"));
}
@Test
public void testOverFlow() {
Pattern tp = Pattern.compile("(a*)*");
Matcher tm = tp.matcher("aaa");
assertTrue(tm.matches());
assertEquals("", tm.group(1));
assertTrue(Pattern.matches("(1+)\\1+", "11"));
assertTrue(Pattern.matches("(1+)(2*)\\2+", "11"));
Pattern pat = Pattern.compile("(1+)\\1*");
Matcher mat = pat.matcher("11");
assertTrue(mat.matches());
assertEquals("11", mat.group(1));
pat = Pattern.compile("((1+)|(2+))(\\2+)");
mat = pat.matcher("11");
assertTrue(mat.matches());
assertEquals("1", mat.group(2));
assertEquals("1", mat.group(1));
assertEquals("1", mat.group(4));
assertNull(mat.group(3));
}
@Test
public void testUnicode() {
assertTrue(Pattern.matches("\\x61a", "aa"));
assertTrue(Pattern.matches("\\u0061a", "aa"));
assertTrue(Pattern.matches("\\0141a", "aa"));
assertTrue(Pattern.matches("\\0777", "?7"));
}
@Test
public void testUnicodeCategory() {
assertTrue(Pattern.matches("\\p{Ll}", "k")); // Unicode lower case
assertTrue(Pattern.matches("\\P{Ll}", "K")); // Unicode non-lower
// case
assertTrue(Pattern.matches("\\p{Lu}", "K")); // Unicode upper case
assertTrue(Pattern.matches("\\P{Lu}", "k")); // Unicode non-upper
// case
// combinations
assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "k"));
assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "K"));
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "K"));
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "k"));
// category/character combinations
assertFalse(Pattern.matches("[\\p{L}&&[^a-z]]", "k"));
assertTrue(Pattern.matches("[\\p{L}&&[^a-z]]", "K"));
assertTrue(Pattern.matches("[\\p{Lu}a-z]", "k"));
assertTrue(Pattern.matches("[a-z\\p{Lu}]", "k"));
assertFalse(Pattern.matches("[\\p{Lu}a-d]", "k"));
assertTrue(Pattern.matches("[a-d\\p{Lu}]", "K"));
// assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^K]]]", "K"));
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^G]]]", "K"));
}
@Test
public void testSplitEmpty() {
Pattern pat = Pattern.compile("");
String[] s = pat.split("", -1);
assertEquals(1, s.length);
assertEquals("", s[0]);
}
@Test
public void testFindDollar() {
Matcher mat = Pattern.compile("a$").matcher("a\n");
assertTrue(mat.find());
assertEquals("a", mat.group());
}
@Test
public void testMatchesRegionChanged() {
// Regression for HARMONY-610
String input = " word ";
Pattern pattern = Pattern.compile("\\w+");
Matcher matcher = pattern.matcher(input);
matcher.region(1, 5);
assertTrue(matcher.matches());
}
@Test
public void testAllCodePoints() {
// Regression for HARMONY-3145
int[] codePoint = new int[1];
Pattern p = Pattern.compile("(\\p{all})+");
boolean res = true;
int cnt = 0;
String s;
for (int i = 0; i < 0x110000; i++) {
codePoint[0] = i;
s = new String(codePoint, 0, 1);
if (!s.matches(p.toString())) {
cnt++;
res = false;
}
}
assertTrue(res);
assertEquals(0, cnt);
p = Pattern.compile("(\\P{all})+");
res = true;
cnt = 0;
for (int i = 0; i < 0x110000; i++) {
codePoint[0] = i;
s = new String(codePoint, 0, 1);
if (!s.matches(p.toString())) {
cnt++;
res = false;
}
}
assertFalse(res);
assertEquals(0x110000, cnt);
}
@Test
public void testFindRegionChanged() {
// Regression for HARMONY-625
Pattern pattern = Pattern.compile("(?s).*");
Matcher matcher = pattern.matcher("abcde");
matcher.find();
assertEquals("abcde", matcher.group());
matcher = pattern.matcher("abcde");
matcher.region(0, 2);
matcher.find();
assertEquals("ab", matcher.group());
}
@Test
public void testFindRegionChanged2() {
// Regression for HARMONY-713
Pattern pattern = Pattern.compile("c");
String inputStr = "aabb.c";
Matcher matcher = pattern.matcher(inputStr);
matcher.region(0, 3);
assertFalse(matcher.find());
}
@Test
public void testPatternMatcher() throws Exception {
Pattern pattern = Pattern.compile("(?:\\d+)(?:pt)");
assertTrue(pattern.matcher("14pt").matches());
}
@Test
public void test3360() {
String str = "!\"#%&'(),-./";
Pattern p = Pattern.compile("\\s");
Matcher m = p.matcher(str);
assertFalse(m.find());
}
@Test
public void testGeneralPunctuationCategory() {
String[] s = { ",", "!", "\"", "#", "%", "&", "'", "(", ")", "-", ".",
"/" };
String regexp = "\\p{P}";
for (int i = 0; i < s.length; i++) {
Pattern pattern = Pattern.compile(regexp);
Matcher matcher = pattern.matcher(s[i]);
assertTrue(matcher.find());
}
}
@Test
public void testHitEndAfterFind() {
hitEndTest(true, "#01.0", "r((ege)|(geg))x", "regexx", false);
hitEndTest(true, "#01.1", "r((ege)|(geg))x", "regex", false);
hitEndTest(true, "#01.2", "r((ege)|(geg))x", "rege", true);
hitEndTest(true, "#01.2", "r((ege)|(geg))x", "xregexx", false);
hitEndTest(true, "#02.0", "regex", "rexreger", true);
hitEndTest(true, "#02.1", "regex", "raxregexr", false);
String floatRegex = getHexFloatRegex();
hitEndTest(true, "#03.0", floatRegex, Double.toHexString(-1.234d), true);
hitEndTest(true, "#03.1", floatRegex, "1 ABC"
+ Double.toHexString(Double.NaN) + "buhuhu", false);
hitEndTest(true, "#03.2", floatRegex, Double.toHexString(-0.0) + "--",
false);
hitEndTest(true, "#03.3", floatRegex, "--"
+ Double.toHexString(Double.MIN_VALUE) + "--", false);
hitEndTest(true, "#04.0", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)",
"1 fish 2 fish red fish 5", true);
hitEndTest(true, "#04.1", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)",
"----1 fish 2 fish red fish 5----", false);
}
@Test
public void testToString() {
String result = Pattern.compile("(\\d{1,3})").matcher(
"aaaa123456789045").toString();
assertTrue("The result doesn't contain pattern info", result
.contains("(\\d{1,3})"));
}
private void hitEndTest(boolean callFind, String testNo, String regex,
String input, boolean hit) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(input);
if (callFind) {
matcher.find();
} else {
matcher.matches();
}
boolean h = matcher.hitEnd();
assertTrue(testNo, h == hit);
}
private String getHexFloatRegex() {
String hexDecimal = "(-|\\+)?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP](-|\\+)?[0-9]+)?";
String notANumber = "((-|\\+)?Infinity)|([nN]a[nN])";
return new StringBuilder("((").append(hexDecimal).append(")|(").append(
notANumber).append("))").toString();
}
}

View File

@ -0,0 +1,111 @@
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import junit.framework.TestCase;
/**
* Tests Pattern compilation modes and modes triggered in pattern strings
*/
@SuppressWarnings("nls")
public class ModeTest extends TestCase {
public void testCase() throws PatternSyntaxException {
Pattern p;
Matcher m;
p = Pattern.compile("([a-z]+)[0-9]+");
m = p.matcher("cAT123#dog345");
assertTrue(m.find());
assertEquals("dog", m.group(1));
assertFalse(m.find());
p = Pattern.compile("([a-z]+)[0-9]+", Pattern.CASE_INSENSITIVE);
m = p.matcher("cAt123#doG345");
assertTrue(m.find());
assertEquals("cAt", m.group(1));
assertTrue(m.find());
assertEquals("doG", m.group(1));
assertFalse(m.find());
p = Pattern.compile("(?i)([a-z]+)[0-9]+");
m = p.matcher("cAt123#doG345");
assertTrue(m.find());
assertEquals("cAt", m.group(1));
assertTrue(m.find());
assertEquals("doG", m.group(1));
assertFalse(m.find());
}
public void testMultiline() throws PatternSyntaxException {
Pattern p;
Matcher m;
p = Pattern.compile("^foo");
m = p.matcher("foobar");
assertTrue(m.find());
assertTrue(m.start() == 0 && m.end() == 3);
assertFalse(m.find());
m = p.matcher("barfoo");
assertFalse(m.find());
p = Pattern.compile("foo$");
m = p.matcher("foobar");
assertFalse(m.find());
m = p.matcher("barfoo");
assertTrue(m.find());
assertTrue(m.start() == 3 && m.end() == 6);
assertFalse(m.find());
p = Pattern.compile("^foo([0-9]*)", Pattern.MULTILINE);
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
assertTrue(m.find());
assertEquals("1", m.group(1));
assertTrue(m.find());
assertEquals("2", m.group(1));
assertFalse(m.find());
p = Pattern.compile("foo([0-9]*)$", Pattern.MULTILINE);
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
assertTrue(m.find());
assertEquals("3", m.group(1));
assertTrue(m.find());
assertEquals("4", m.group(1));
assertFalse(m.find());
p = Pattern.compile("(?m)^foo([0-9]*)");
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
assertTrue(m.find());
assertEquals("1", m.group(1));
assertTrue(m.find());
assertEquals("2", m.group(1));
assertFalse(m.find());
p = Pattern.compile("(?m)foo([0-9]*)$");
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
assertTrue(m.find());
assertEquals("3", m.group(1));
assertTrue(m.find());
assertEquals("4", m.group(1));
assertFalse(m.find());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import java.util.regex.Pattern;
import junit.framework.TestCase;
/**
* Test boundary and error conditions in java.util.regex.Pattern
*/
@SuppressWarnings("nls")
public class PatternErrorTest extends TestCase {
public void testCompileErrors() throws Exception {
// null regex string - should get NullPointerException
try {
Pattern.compile(null);
fail("NullPointerException expected");
} catch (NullPointerException e) {
}
// empty regex string - no exception should be thrown
Pattern.compile("");
// note: invalid regex syntax checked in PatternSyntaxExceptionTest
// flags = 0 should raise no exception
int flags = 0;
Pattern.compile("foo", flags);
// check that all valid flags accepted without exception
flags |= Pattern.UNIX_LINES;
flags |= Pattern.CASE_INSENSITIVE;
flags |= Pattern.MULTILINE;
flags |= Pattern.CANON_EQ;
flags |= Pattern.COMMENTS;
flags |= Pattern.DOTALL;
flags |= Pattern.UNICODE_CASE;
Pattern.compile("foo", flags);
// add invalid flags - should get IllegalArgumentException
// regression test for HARMONY-4248
flags |= 0xFFFFFFFF;
// TODO investigate, why this fails and uncomment
/*
try {
Pattern.compile("foo", flags);
fail("Expected IllegalArgumentException to be thrown");
} catch (IllegalArgumentException e) {
// This is the expected exception
}*/
}
}

View File

@ -0,0 +1,61 @@
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.teavm.classlib.java.util.regex;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import junit.framework.TestCase;
/**
* TODO Type description
*/
@SuppressWarnings("nls")
public class PatternSyntaxExceptionTest extends TestCase {
public void testCase() {
String regex = "(";
try {
Pattern.compile(regex);
fail("PatternSyntaxException expected");
} catch (PatternSyntaxException e) {
// TOFIX: Commented out assertEquals tests...
// TOFIX: should we match exception strings?
// assertEquals("Unclosed group", e.getDescription());
assertEquals(1, e.getIndex());
// assertEquals("Unclosed group near index 1\n(\n ^",
// e.getMessage());
assertEquals(regex, e.getPattern());
}
}
public void testCase2() {
String regex = "[4-";
try {
Pattern.compile(regex);
fail("PatternSyntaxException expected");
} catch (PatternSyntaxException e) {
// TOFIX: Commented out assertEquals tests...
// TOFIX: should we match exception strings?
// assertEquals("Illegal character range", e.getDescription());
assertEquals(3, e.getIndex());
// assertEquals("Illegal character range near index 3\n[4-\n ^",
// e.getMessage());
assertEquals(regex, e.getPattern());
}
}
}

Some files were not shown because too many files have changed in this diff Show More