mirror of
https://github.com/Eaglercraft-TeaVM-Fork/eagler-teavm.git
synced 2024-12-22 16:14:10 -08:00
Merge branch 'regex'
This commit is contained in:
commit
b93c71ad41
|
@ -118,6 +118,7 @@
|
|||
<argument>java.util</argument>
|
||||
<argument>java.util.logging</argument>
|
||||
<argument>java.util.concurrent</argument>
|
||||
<argument>java.util.regex</argument>
|
||||
<argument>-output</argument>
|
||||
<argument>${project.build.directory}/jcl-report</argument>
|
||||
</arguments>
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.teavm.classlib.java.util.TArrays;
|
|||
import org.teavm.classlib.java.util.TComparator;
|
||||
import org.teavm.classlib.java.util.THashMap;
|
||||
import org.teavm.classlib.java.util.TMap;
|
||||
import org.teavm.classlib.java.util.regex.TPattern;
|
||||
import org.teavm.dependency.PluggableDependency;
|
||||
import org.teavm.javascript.ni.InjectedBy;
|
||||
import org.teavm.javascript.ni.Rename;
|
||||
|
@ -161,6 +162,18 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
|||
}
|
||||
}
|
||||
|
||||
public boolean contentEquals(TStringBuffer buffer) {
|
||||
if (characters.length != buffer.length()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < characters.length; ++i) {
|
||||
if (characters[i] != buffer.charAt(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean contentEquals(TCharSequence charSeq) {
|
||||
if (this == charSeq) {
|
||||
return true;
|
||||
|
@ -226,6 +239,24 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
|||
return startsWith(prefix, 0);
|
||||
}
|
||||
|
||||
public boolean regionMatches(boolean ignoreCase, int toffset, String other, int ooffset, int len) {
|
||||
if (toffset < 0 || ooffset < 0 || toffset + len > length() || ooffset + len > other.length()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < len; ++i) {
|
||||
char a = charAt(toffset++);
|
||||
char b = other.charAt(ooffset++);
|
||||
if (ignoreCase) {
|
||||
a = TCharacter.toLowerCase(a);
|
||||
b = TCharacter.toLowerCase(b);
|
||||
}
|
||||
if (a != b) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean regionMatches(int toffset, TString other, int ooffset, int len) {
|
||||
if (toffset < 0 || ooffset < 0 || toffset + len > length() || ooffset + len > other.length()) {
|
||||
return false;
|
||||
|
@ -613,4 +644,24 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
|||
}
|
||||
return interned;
|
||||
}
|
||||
|
||||
public boolean matches(String regex) {
|
||||
return TPattern.matches(regex, this.toString());
|
||||
}
|
||||
|
||||
public String[] split(String regex) {
|
||||
return TPattern.compile(regex).split(this.toString());
|
||||
}
|
||||
|
||||
public String[] split(String regex, int limit) {
|
||||
return TPattern.compile(regex).split(this.toString(), limit);
|
||||
}
|
||||
|
||||
public String replaceAll(String regex, String replacement) {
|
||||
return TPattern.compile(regex).matcher(toString()).replaceAll(replacement);
|
||||
}
|
||||
|
||||
public String replaceFirst(String regex, String replacement) {
|
||||
return TPattern.compile(regex).matcher(toString()).replaceFirst(replacement);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.java.util;
|
||||
|
||||
import org.teavm.classlib.java.lang.TRuntimeException;
|
||||
import org.teavm.classlib.java.lang.TString;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Alexey Andreev
|
||||
*/
|
||||
public class TMissingResourceException extends TRuntimeException {
|
||||
private static final long serialVersionUID = 6730397307327337970L;
|
||||
private String className;
|
||||
private String key;
|
||||
|
||||
public TMissingResourceException(String s, String className, String key) {
|
||||
super(TString.wrap(s));
|
||||
this.className = className;
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public String getClassName() {
|
||||
return className;
|
||||
}
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,896 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* This class represents character classes, i.e. sets of character either
|
||||
* predefined or user defined.
|
||||
*
|
||||
* Note, this class represent token, not node, so being constructed by lexer.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class AbstractCharClass extends SpecialToken {
|
||||
protected boolean alt;
|
||||
|
||||
protected boolean altSurrogates;
|
||||
|
||||
// Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
|
||||
static int SURROGATE_CARDINALITY = 2048;
|
||||
|
||||
BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
|
||||
|
||||
AbstractCharClass charClassWithoutSurrogates = null;
|
||||
|
||||
AbstractCharClass charClassWithSurrogates = null;
|
||||
|
||||
static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
|
||||
|
||||
/*
|
||||
* Indicates if this class may contain supplementary Unicode codepoints. If
|
||||
* this flag is specified it doesn't mean that this class contains
|
||||
* supplementary characters but may contain.
|
||||
*/
|
||||
protected boolean mayContainSupplCodepoints = false;
|
||||
|
||||
/**
|
||||
* Returns true if this char class contains character specified;
|
||||
*
|
||||
* @param ch
|
||||
* character to check;
|
||||
*/
|
||||
abstract public boolean contains(int ch);
|
||||
|
||||
/**
|
||||
* Returns BitSet representing this character class or <code>null</code> if
|
||||
* this character class does not have character representation;
|
||||
*
|
||||
* @return bitset
|
||||
*/
|
||||
protected BitSet getBits() {
|
||||
return null;
|
||||
}
|
||||
|
||||
protected BitSet getLowHighSurrogates() {
|
||||
return lowHighSurrogates;
|
||||
}
|
||||
|
||||
public boolean hasLowHighSurrogates() {
|
||||
return altSurrogates ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY : lowHighSurrogates
|
||||
.nextSetBit(0) < SURROGATE_CARDINALITY;
|
||||
}
|
||||
|
||||
public boolean mayContainSupplCodepoints() {
|
||||
return mayContainSupplCodepoints;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return SpecialToken.TOK_CHARCLASS;
|
||||
}
|
||||
|
||||
public AbstractCharClass getInstance() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public AbstractCharClass getSurrogates() {
|
||||
|
||||
if (charClassWithSurrogates == null) {
|
||||
final BitSet lHS = getLowHighSurrogates();
|
||||
|
||||
charClassWithSurrogates = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
int index = ch - Character.MIN_SURROGATE;
|
||||
|
||||
return ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^
|
||||
lHS.get(index) : false;
|
||||
}
|
||||
};
|
||||
charClassWithSurrogates.setNegative(this.altSurrogates);
|
||||
}
|
||||
|
||||
return charClassWithSurrogates;
|
||||
}
|
||||
|
||||
public AbstractCharClass getWithoutSurrogates() {
|
||||
if (charClassWithoutSurrogates == null) {
|
||||
final BitSet lHS = getLowHighSurrogates();
|
||||
final AbstractCharClass thisClass = this;
|
||||
|
||||
charClassWithoutSurrogates = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
int index = ch - Character.MIN_SURROGATE;
|
||||
|
||||
boolean containslHS = ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this.altSurrogates ^
|
||||
lHS.get(index)
|
||||
: false;
|
||||
|
||||
return thisClass.contains(ch) && !containslHS;
|
||||
}
|
||||
};
|
||||
charClassWithoutSurrogates.setNegative(isNegative());
|
||||
charClassWithoutSurrogates.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
}
|
||||
|
||||
return charClassWithoutSurrogates;
|
||||
}
|
||||
|
||||
public boolean hasUCI() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this CharClass to negative form, i.e. if they will add some
|
||||
* characters and after that set this class to negative it will accept all
|
||||
* the characters except previously set ones.
|
||||
*
|
||||
* Although this method will not alternate all the already set characters,
|
||||
* just overall meaning of the class.
|
||||
*
|
||||
* @see #contains(int)
|
||||
* @see #intersect(CharClass)
|
||||
* @see #union(CharClass)
|
||||
*/
|
||||
public AbstractCharClass setNegative(boolean value) {
|
||||
if (alt ^ value) {
|
||||
alt = !alt;
|
||||
altSurrogates = !altSurrogates;
|
||||
}
|
||||
if (!mayContainSupplCodepoints) {
|
||||
mayContainSupplCodepoints = true;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isNegative() {
|
||||
return alt;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Static methods and predefined classes
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
public static boolean intersects(int ch1, int ch2) {
|
||||
return ch1 == ch2;
|
||||
}
|
||||
|
||||
public static boolean intersects(AbstractCharClass cc, int ch) {
|
||||
return cc.contains(ch);
|
||||
}
|
||||
|
||||
public static boolean intersects(AbstractCharClass cc1, AbstractCharClass cc2) {
|
||||
if (cc1.getBits() == null || cc2.getBits() == null)
|
||||
return true;
|
||||
return cc1.getBits().intersects(cc2.getBits());
|
||||
}
|
||||
|
||||
public static AbstractCharClass getPredefinedClass(String name, boolean negative) {
|
||||
return ((LazyCharClass)charClasses.getObject(name)).getValue(negative);
|
||||
}
|
||||
|
||||
abstract static class LazyCharClass {
|
||||
AbstractCharClass posValue = null;
|
||||
|
||||
AbstractCharClass negValue = null;
|
||||
|
||||
public AbstractCharClass getValue(boolean negative) {
|
||||
if (!negative && posValue == null) {
|
||||
posValue = computeValue();
|
||||
} else if (negative && negValue == null) {
|
||||
negValue = computeValue().setNegative(true);
|
||||
}
|
||||
if (!negative)
|
||||
return posValue;
|
||||
return negValue;
|
||||
}
|
||||
|
||||
protected abstract AbstractCharClass computeValue();
|
||||
}
|
||||
|
||||
static class LazyDigit extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('0', '9');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyNonDigit extends LazyDigit {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = super.computeValue().setNegative(true);
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazySpace extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
/* 9-13 - \t\n\x0B\f\r; 32 - ' ' */
|
||||
return new CharClass().add(9, 13).add(32);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyNonSpace extends LazySpace {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = super.computeValue().setNegative(true);
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyWord extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('a', 'z').add('A', 'Z').add('0', '9').add('_');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyNonWord extends LazyWord {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = super.computeValue().setNegative(true);
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyLower extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('a', 'z');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyUpper extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('A', 'Z');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyASCII extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add(0x00, 0x7F);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyAlpha extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('a', 'z').add('A', 'Z');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyAlnum extends LazyAlpha {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return ((CharClass)super.computeValue()).add('0', '9');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyPunct extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
/* Punctuation !"#$%&'()*+,-./:;<=>?@ [\]^_` {|}~ */
|
||||
return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyGraph extends LazyAlnum {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
/* plus punctuation */
|
||||
return ((CharClass)super.computeValue()).add(0x21, 0x40).add(0x5B, 0x60).add(0x7B, 0x7E);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyPrint extends LazyGraph {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return ((CharClass)super.computeValue()).add(0x20);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyBlank extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add(' ').add('\t');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyCntrl extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add(0x00, 0x1F).add(0x7F);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyXDigit extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new CharClass().add('0', '9').add('a', 'f').add('A', 'F');
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyRange extends LazyCharClass {
|
||||
int start, end;
|
||||
|
||||
public LazyRange(int start, int end) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new CharClass().add(start, end);
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazySpecialsBlock extends LazyCharClass {
|
||||
@Override
|
||||
public AbstractCharClass computeValue() {
|
||||
return new CharClass().add(0xFEFF, 0xFEFF).add(0xFFF0, 0xFFFD);
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyCategoryScope extends LazyCharClass {
|
||||
int category;
|
||||
|
||||
boolean mayContainSupplCodepoints;
|
||||
|
||||
boolean containsAllSurrogates;
|
||||
|
||||
public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) {
|
||||
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
this.category = cat;
|
||||
}
|
||||
|
||||
public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) {
|
||||
this.containsAllSurrogates = containsAllSurrogates;
|
||||
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
this.category = cat;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new UnicodeCategoryScope(category);
|
||||
if (containsAllSurrogates) {
|
||||
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
|
||||
}
|
||||
|
||||
chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyCategory extends LazyCharClass {
|
||||
int category;
|
||||
|
||||
boolean mayContainSupplCodepoints;
|
||||
|
||||
boolean containsAllSurrogates;
|
||||
|
||||
public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
|
||||
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
this.category = cat;
|
||||
}
|
||||
|
||||
public LazyCategory(int cat, boolean mayContainSupplCodepoints, boolean containsAllSurrogates) {
|
||||
this.containsAllSurrogates = containsAllSurrogates;
|
||||
this.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
this.category = cat;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new UnicodeCategory(category);
|
||||
if (containsAllSurrogates) {
|
||||
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
|
||||
}
|
||||
chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
|
||||
;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaLowerCase extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isLowerCase(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaUpperCase extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isUpperCase(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaWhitespace extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isWhitespace(ch);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaMirrored extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
// TODO implement this method and uncomment
|
||||
// return Character.isMirrored(ch);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaDefined extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isDefined(ch);
|
||||
}
|
||||
};
|
||||
chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaDigit extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isDigit(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaIdentifierIgnorable extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isIdentifierIgnorable(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaISOControl extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isISOControl(ch);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaJavaIdentifierPart extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isJavaIdentifierPart(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaJavaIdentifierStart extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isJavaIdentifierStart(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaLetter extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isLetter(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaLetterOrDigit extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isLetterOrDigit(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaSpaceChar extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isSpaceChar(ch);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaTitleCase extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
return new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isTitleCase(ch);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isUnicodeIdentifierPart(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
|
||||
@Override
|
||||
protected AbstractCharClass computeValue() {
|
||||
AbstractCharClass chCl = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return Character.isUnicodeIdentifierStart(ch);
|
||||
}
|
||||
};
|
||||
|
||||
chCl.mayContainSupplCodepoints = true;
|
||||
return chCl;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* character classes generated from http://www.unicode.org/reports/tr18/
|
||||
* http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt
|
||||
*/
|
||||
static final class PredefinedCharacterClasses {
|
||||
static LazyCharClass space = new LazySpace();
|
||||
|
||||
static LazyCharClass digit = new LazyDigit();
|
||||
|
||||
static final Object[][] contents = {
|
||||
{ "Lower", new LazyLower() }, //$NON-NLS-1$
|
||||
{ "Upper", new LazyUpper() }, //$NON-NLS-1$
|
||||
{ "ASCII", new LazyASCII() }, //$NON-NLS-1$
|
||||
{ "Alpha", new LazyAlpha() }, //$NON-NLS-1$
|
||||
{ "Digit", digit }, //$NON-NLS-1$
|
||||
{ "Alnum", new LazyAlnum() }, //$NON-NLS-1$
|
||||
{ "Punct", new LazyPunct() }, //$NON-NLS-1$
|
||||
{ "Graph", new LazyGraph() }, //$NON-NLS-1$
|
||||
{ "Print", new LazyPrint() }, //$NON-NLS-1$
|
||||
{ "Blank", new LazyBlank() }, //$NON-NLS-1$
|
||||
{ "Cntrl", new LazyCntrl() }, //$NON-NLS-1$
|
||||
{ "XDigit", new LazyXDigit() }, //$NON-NLS-1$
|
||||
{ "javaLowerCase", new LazyJavaLowerCase() }, //$NON-NLS-1$
|
||||
{ "javaUpperCase", new LazyJavaUpperCase() }, //$NON-NLS-1$
|
||||
{ "javaWhitespace", new LazyJavaWhitespace() }, //$NON-NLS-1$
|
||||
{ "javaMirrored", new LazyJavaMirrored() }, //$NON-NLS-1$
|
||||
{ "javaDefined", new LazyJavaDefined() }, //$NON-NLS-1$
|
||||
{ "javaDigit", new LazyJavaDigit() }, //$NON-NLS-1$
|
||||
{ "javaIdentifierIgnorable", new LazyJavaIdentifierIgnorable() }, //$NON-NLS-1$
|
||||
{ "javaISOControl", new LazyJavaISOControl() }, //$NON-NLS-1$
|
||||
{ "javaJavaIdentifierPart", new LazyJavaJavaIdentifierPart() }, //$NON-NLS-1$
|
||||
{ "javaJavaIdentifierStart", new LazyJavaJavaIdentifierStart() }, //$NON-NLS-1$
|
||||
{ "javaLetter", new LazyJavaLetter() }, //$NON-NLS-1$
|
||||
{ "javaLetterOrDigit", new LazyJavaLetterOrDigit() }, //$NON-NLS-1$
|
||||
{ "javaSpaceChar", new LazyJavaSpaceChar() }, //$NON-NLS-1$
|
||||
{ "javaTitleCase", new LazyJavaTitleCase() }, //$NON-NLS-1$
|
||||
{ "javaUnicodeIdentifierPart", new LazyJavaUnicodeIdentifierPart() }, //$NON-NLS-1$
|
||||
{ "javaUnicodeIdentifierStart", new LazyJavaUnicodeIdentifierStart() }, //$NON-NLS-1$
|
||||
{ "Space", space }, //$NON-NLS-1$
|
||||
{ "w", new LazyWord() }, //$NON-NLS-1$
|
||||
{ "W", new LazyNonWord() }, //$NON-NLS-1$
|
||||
{ "s", space }, //$NON-NLS-1$
|
||||
{ "S", new LazyNonSpace() }, //$NON-NLS-1$
|
||||
{ "d", digit }, //$NON-NLS-1$
|
||||
{ "D", new LazyNonDigit() }, //$NON-NLS-1$
|
||||
{ "BasicLatin", new LazyRange(0x0000, 0x007F) }, //$NON-NLS-1$
|
||||
{ "Latin-1Supplement", new LazyRange(0x0080, 0x00FF) }, //$NON-NLS-1$
|
||||
{ "LatinExtended-A", new LazyRange(0x0100, 0x017F) }, //$NON-NLS-1$
|
||||
{ "LatinExtended-B", new LazyRange(0x0180, 0x024F) }, //$NON-NLS-1$
|
||||
{ "IPAExtensions", new LazyRange(0x0250, 0x02AF) }, //$NON-NLS-1$
|
||||
{ "SpacingModifierLetters", new LazyRange(0x02B0, 0x02FF) }, //$NON-NLS-1$
|
||||
{ "CombiningDiacriticalMarks", new LazyRange(0x0300, 0x036F) }, //$NON-NLS-1$
|
||||
{ "Greek", new LazyRange(0x0370, 0x03FF) }, //$NON-NLS-1$
|
||||
{ "Cyrillic", new LazyRange(0x0400, 0x04FF) }, //$NON-NLS-1$
|
||||
{ "CyrillicSupplement", new LazyRange(0x0500, 0x052F) }, //$NON-NLS-1$
|
||||
{ "Armenian", new LazyRange(0x0530, 0x058F) }, //$NON-NLS-1$
|
||||
{ "Hebrew", new LazyRange(0x0590, 0x05FF) }, //$NON-NLS-1$
|
||||
{ "Arabic", new LazyRange(0x0600, 0x06FF) }, //$NON-NLS-1$
|
||||
{ "Syriac", new LazyRange(0x0700, 0x074F) }, //$NON-NLS-1$
|
||||
{ "ArabicSupplement", new LazyRange(0x0750, 0x077F) }, //$NON-NLS-1$
|
||||
{ "Thaana", new LazyRange(0x0780, 0x07BF) }, //$NON-NLS-1$
|
||||
{ "Devanagari", new LazyRange(0x0900, 0x097F) }, //$NON-NLS-1$
|
||||
{ "Bengali", new LazyRange(0x0980, 0x09FF) }, //$NON-NLS-1$
|
||||
{ "Gurmukhi", new LazyRange(0x0A00, 0x0A7F) }, //$NON-NLS-1$
|
||||
{ "Gujarati", new LazyRange(0x0A80, 0x0AFF) }, //$NON-NLS-1$
|
||||
{ "Oriya", new LazyRange(0x0B00, 0x0B7F) }, //$NON-NLS-1$
|
||||
{ "Tamil", new LazyRange(0x0B80, 0x0BFF) }, //$NON-NLS-1$
|
||||
{ "Telugu", new LazyRange(0x0C00, 0x0C7F) }, //$NON-NLS-1$
|
||||
{ "Kannada", new LazyRange(0x0C80, 0x0CFF) }, //$NON-NLS-1$
|
||||
{ "Malayalam", new LazyRange(0x0D00, 0x0D7F) }, //$NON-NLS-1$
|
||||
{ "Sinhala", new LazyRange(0x0D80, 0x0DFF) }, //$NON-NLS-1$
|
||||
{ "Thai", new LazyRange(0x0E00, 0x0E7F) }, //$NON-NLS-1$
|
||||
{ "Lao", new LazyRange(0x0E80, 0x0EFF) }, //$NON-NLS-1$
|
||||
{ "Tibetan", new LazyRange(0x0F00, 0x0FFF) }, //$NON-NLS-1$
|
||||
{ "Myanmar", new LazyRange(0x1000, 0x109F) }, //$NON-NLS-1$
|
||||
{ "Georgian", new LazyRange(0x10A0, 0x10FF) }, //$NON-NLS-1$
|
||||
{ "HangulJamo", new LazyRange(0x1100, 0x11FF) }, //$NON-NLS-1$
|
||||
{ "Ethiopic", new LazyRange(0x1200, 0x137F) }, //$NON-NLS-1$
|
||||
{ "EthiopicSupplement", new LazyRange(0x1380, 0x139F) }, //$NON-NLS-1$
|
||||
{ "Cherokee", new LazyRange(0x13A0, 0x13FF) }, //$NON-NLS-1$
|
||||
{ "UnifiedCanadianAboriginalSyllabics", //$NON-NLS-1$
|
||||
new LazyRange(0x1400, 0x167F) },
|
||||
{ "Ogham", new LazyRange(0x1680, 0x169F) }, //$NON-NLS-1$
|
||||
{ "Runic", new LazyRange(0x16A0, 0x16FF) }, //$NON-NLS-1$
|
||||
{ "Tagalog", new LazyRange(0x1700, 0x171F) }, //$NON-NLS-1$
|
||||
{ "Hanunoo", new LazyRange(0x1720, 0x173F) }, //$NON-NLS-1$
|
||||
{ "Buhid", new LazyRange(0x1740, 0x175F) }, //$NON-NLS-1$
|
||||
{ "Tagbanwa", new LazyRange(0x1760, 0x177F) }, //$NON-NLS-1$
|
||||
{ "Khmer", new LazyRange(0x1780, 0x17FF) }, //$NON-NLS-1$
|
||||
{ "Mongolian", new LazyRange(0x1800, 0x18AF) }, //$NON-NLS-1$
|
||||
{ "Limbu", new LazyRange(0x1900, 0x194F) }, //$NON-NLS-1$
|
||||
{ "TaiLe", new LazyRange(0x1950, 0x197F) }, //$NON-NLS-1$
|
||||
{ "NewTaiLue", new LazyRange(0x1980, 0x19DF) }, //$NON-NLS-1$
|
||||
{ "KhmerSymbols", new LazyRange(0x19E0, 0x19FF) }, //$NON-NLS-1$
|
||||
{ "Buginese", new LazyRange(0x1A00, 0x1A1F) }, //$NON-NLS-1$
|
||||
{ "PhoneticExtensions", new LazyRange(0x1D00, 0x1D7F) }, //$NON-NLS-1$
|
||||
{ "PhoneticExtensionsSupplement", new LazyRange(0x1D80, 0x1DBF) }, //$NON-NLS-1$
|
||||
{ "CombiningDiacriticalMarksSupplement", //$NON-NLS-1$
|
||||
new LazyRange(0x1DC0, 0x1DFF) },
|
||||
{ "LatinExtendedAdditional", new LazyRange(0x1E00, 0x1EFF) }, //$NON-NLS-1$
|
||||
{ "GreekExtended", new LazyRange(0x1F00, 0x1FFF) }, //$NON-NLS-1$
|
||||
{ "GeneralPunctuation", new LazyRange(0x2000, 0x206F) }, //$NON-NLS-1$
|
||||
{ "SuperscriptsandSubscripts", new LazyRange(0x2070, 0x209F) }, //$NON-NLS-1$
|
||||
{ "CurrencySymbols", new LazyRange(0x20A0, 0x20CF) }, //$NON-NLS-1$
|
||||
{ "CombiningMarksforSymbols", new LazyRange(0x20D0, 0x20FF) }, //$NON-NLS-1$
|
||||
{ "LetterlikeSymbols", new LazyRange(0x2100, 0x214F) }, //$NON-NLS-1$
|
||||
{ "NumberForms", new LazyRange(0x2150, 0x218F) }, //$NON-NLS-1$
|
||||
{ "Arrows", new LazyRange(0x2190, 0x21FF) }, //$NON-NLS-1$
|
||||
{ "MathematicalOperators", new LazyRange(0x2200, 0x22FF) }, //$NON-NLS-1$
|
||||
{ "MiscellaneousTechnical", new LazyRange(0x2300, 0x23FF) }, //$NON-NLS-1$
|
||||
{ "ControlPictures", new LazyRange(0x2400, 0x243F) }, //$NON-NLS-1$
|
||||
{ "OpticalCharacterRecognition", new LazyRange(0x2440, 0x245F) }, //$NON-NLS-1$
|
||||
{ "EnclosedAlphanumerics", new LazyRange(0x2460, 0x24FF) }, //$NON-NLS-1$
|
||||
{ "BoxDrawing", new LazyRange(0x2500, 0x257F) }, //$NON-NLS-1$
|
||||
{ "BlockElements", new LazyRange(0x2580, 0x259F) }, //$NON-NLS-1$
|
||||
{ "GeometricShapes", new LazyRange(0x25A0, 0x25FF) }, //$NON-NLS-1$
|
||||
{ "MiscellaneousSymbols", new LazyRange(0x2600, 0x26FF) }, //$NON-NLS-1$
|
||||
{ "Dingbats", new LazyRange(0x2700, 0x27BF) }, //$NON-NLS-1$
|
||||
{ "MiscellaneousMathematicalSymbols-A", //$NON-NLS-1$
|
||||
new LazyRange(0x27C0, 0x27EF) },
|
||||
{ "SupplementalArrows-A", new LazyRange(0x27F0, 0x27FF) }, //$NON-NLS-1$
|
||||
{ "BraillePatterns", new LazyRange(0x2800, 0x28FF) }, //$NON-NLS-1$
|
||||
{ "SupplementalArrows-B", new LazyRange(0x2900, 0x297F) }, //$NON-NLS-1$
|
||||
{ "MiscellaneousMathematicalSymbols-B", //$NON-NLS-1$
|
||||
new LazyRange(0x2980, 0x29FF) },
|
||||
{ "SupplementalMathematicalOperators", //$NON-NLS-1$
|
||||
new LazyRange(0x2A00, 0x2AFF) },
|
||||
{ "MiscellaneousSymbolsandArrows", //$NON-NLS-1$
|
||||
new LazyRange(0x2B00, 0x2BFF) },
|
||||
{ "Glagolitic", new LazyRange(0x2C00, 0x2C5F) }, //$NON-NLS-1$
|
||||
{ "Coptic", new LazyRange(0x2C80, 0x2CFF) }, //$NON-NLS-1$
|
||||
{ "GeorgianSupplement", new LazyRange(0x2D00, 0x2D2F) }, //$NON-NLS-1$
|
||||
{ "Tifinagh", new LazyRange(0x2D30, 0x2D7F) }, //$NON-NLS-1$
|
||||
{ "EthiopicExtended", new LazyRange(0x2D80, 0x2DDF) }, //$NON-NLS-1$
|
||||
{ "SupplementalPunctuation", new LazyRange(0x2E00, 0x2E7F) }, //$NON-NLS-1$
|
||||
{ "CJKRadicalsSupplement", new LazyRange(0x2E80, 0x2EFF) }, //$NON-NLS-1$
|
||||
{ "KangxiRadicals", new LazyRange(0x2F00, 0x2FDF) }, //$NON-NLS-1$
|
||||
{ "IdeographicDescriptionCharacters", //$NON-NLS-1$
|
||||
new LazyRange(0x2FF0, 0x2FFF) },
|
||||
{ "CJKSymbolsandPunctuation", new LazyRange(0x3000, 0x303F) }, //$NON-NLS-1$
|
||||
{ "Hiragana", new LazyRange(0x3040, 0x309F) }, //$NON-NLS-1$
|
||||
{ "Katakana", new LazyRange(0x30A0, 0x30FF) }, //$NON-NLS-1$
|
||||
{ "Bopomofo", new LazyRange(0x3100, 0x312F) }, //$NON-NLS-1$
|
||||
{ "HangulCompatibilityJamo", new LazyRange(0x3130, 0x318F) }, //$NON-NLS-1$
|
||||
{ "Kanbun", new LazyRange(0x3190, 0x319F) }, //$NON-NLS-1$
|
||||
{ "BopomofoExtended", new LazyRange(0x31A0, 0x31BF) }, //$NON-NLS-1$
|
||||
{ "CJKStrokes", new LazyRange(0x31C0, 0x31EF) }, //$NON-NLS-1$
|
||||
{ "KatakanaPhoneticExtensions", new LazyRange(0x31F0, 0x31FF) }, //$NON-NLS-1$
|
||||
{ "EnclosedCJKLettersandMonths", new LazyRange(0x3200, 0x32FF) }, //$NON-NLS-1$
|
||||
{ "CJKCompatibility", new LazyRange(0x3300, 0x33FF) }, //$NON-NLS-1$
|
||||
{ "CJKUnifiedIdeographsExtensionA", //$NON-NLS-1$
|
||||
new LazyRange(0x3400, 0x4DB5) },
|
||||
{ "YijingHexagramSymbols", new LazyRange(0x4DC0, 0x4DFF) }, //$NON-NLS-1$
|
||||
{ "CJKUnifiedIdeographs", new LazyRange(0x4E00, 0x9FFF) }, //$NON-NLS-1$
|
||||
{ "YiSyllables", new LazyRange(0xA000, 0xA48F) }, //$NON-NLS-1$
|
||||
{ "YiRadicals", new LazyRange(0xA490, 0xA4CF) }, //$NON-NLS-1$
|
||||
{ "ModifierToneLetters", new LazyRange(0xA700, 0xA71F) }, //$NON-NLS-1$
|
||||
{ "SylotiNagri", new LazyRange(0xA800, 0xA82F) }, //$NON-NLS-1$
|
||||
{ "HangulSyllables", new LazyRange(0xAC00, 0xD7A3) }, //$NON-NLS-1$
|
||||
{ "HighSurrogates", new LazyRange(0xD800, 0xDB7F) }, //$NON-NLS-1$
|
||||
{ "HighPrivateUseSurrogates", new LazyRange(0xDB80, 0xDBFF) }, //$NON-NLS-1$
|
||||
{ "LowSurrogates", new LazyRange(0xDC00, 0xDFFF) }, //$NON-NLS-1$
|
||||
{ "PrivateUseArea", new LazyRange(0xE000, 0xF8FF) }, //$NON-NLS-1$
|
||||
{ "CJKCompatibilityIdeographs", new LazyRange(0xF900, 0xFAFF) }, //$NON-NLS-1$
|
||||
{ "AlphabeticPresentationForms", new LazyRange(0xFB00, 0xFB4F) }, //$NON-NLS-1$
|
||||
{ "ArabicPresentationForms-A", new LazyRange(0xFB50, 0xFDFF) }, //$NON-NLS-1$
|
||||
{ "VariationSelectors", new LazyRange(0xFE00, 0xFE0F) }, //$NON-NLS-1$
|
||||
{ "VerticalForms", new LazyRange(0xFE10, 0xFE1F) }, //$NON-NLS-1$
|
||||
{ "CombiningHalfMarks", new LazyRange(0xFE20, 0xFE2F) }, //$NON-NLS-1$
|
||||
{ "CJKCompatibilityForms", new LazyRange(0xFE30, 0xFE4F) }, //$NON-NLS-1$
|
||||
{ "SmallFormVariants", new LazyRange(0xFE50, 0xFE6F) }, //$NON-NLS-1$
|
||||
{ "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
|
||||
{ "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
|
||||
{ "all", new LazyRange(0x00, 0x10FFFF) }, //$NON-NLS-1$
|
||||
{ "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
|
||||
{ "Cn", new LazyCategory(Character.UNASSIGNED, true) },
|
||||
{ "IsL", new LazyCategoryScope(0x3E, true) },
|
||||
{ "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) },
|
||||
{ "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) },
|
||||
{ "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) },
|
||||
{ "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) },
|
||||
{ "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
|
||||
{ "IsM", new LazyCategoryScope(0x1C0, true) },
|
||||
{ "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) },
|
||||
{ "Me", new LazyCategory(Character.ENCLOSING_MARK, false) },
|
||||
{ "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) },
|
||||
{ "N", new LazyCategoryScope(0xE00, true) },
|
||||
{ "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) },
|
||||
{ "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
|
||||
{ "No", new LazyCategory(Character.OTHER_NUMBER, true) },
|
||||
{ "IsZ", new LazyCategoryScope(0x7000, false) },
|
||||
{ "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) },
|
||||
{ "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) },
|
||||
{ "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) },
|
||||
{ "IsC", new LazyCategoryScope(0xF0000, true, true) },
|
||||
{ "Cc", new LazyCategory(Character.CONTROL, false) },
|
||||
{ "Cf", new LazyCategory(Character.FORMAT, true) },
|
||||
{ "Co", new LazyCategory(Character.PRIVATE_USE, true) },
|
||||
{ "Cs", new LazyCategory(Character.SURROGATE, false, true) },
|
||||
{
|
||||
"IsP",
|
||||
new LazyCategoryScope((1 << Character.DASH_PUNCTUATION) | (1 << Character.START_PUNCTUATION) |
|
||||
(1 << Character.END_PUNCTUATION) | (1 << Character.CONNECTOR_PUNCTUATION) |
|
||||
(1 << Character.OTHER_PUNCTUATION) | (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
|
||||
(1 << Character.FINAL_QUOTE_PUNCTUATION), true) },
|
||||
{ "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) },
|
||||
{ "Ps", new LazyCategory(Character.START_PUNCTUATION, false) },
|
||||
{ "Pe", new LazyCategory(Character.END_PUNCTUATION, false) },
|
||||
{ "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) },
|
||||
{ "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) },
|
||||
{ "IsS", new LazyCategoryScope(0x7E000000, true) },
|
||||
{ "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
|
||||
{ "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) },
|
||||
{ "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) },
|
||||
{ "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
|
||||
{ "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) },
|
||||
{ "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } };
|
||||
|
||||
public Object getObject(String name) {
|
||||
for (int i = 0; i < contents.length; ++i) {
|
||||
Object[] row = contents[i];
|
||||
if (name.equals(row[0])) {
|
||||
return row[1];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Line terminator factory
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class AbstractLineTerminator {
|
||||
static AbstractLineTerminator unixLT = null;
|
||||
|
||||
static AbstractLineTerminator unicodeLT = null;
|
||||
|
||||
public abstract boolean isLineTerminator(int ch);
|
||||
|
||||
public abstract boolean isAfterLineTerminator(int ch1, int ch2);
|
||||
|
||||
public static AbstractLineTerminator getInstance(int flag) {
|
||||
if ((flag & TPattern.UNIX_LINES) != 0) {
|
||||
if (unixLT != null)
|
||||
return unixLT;
|
||||
unixLT = new AbstractLineTerminator() {
|
||||
@Override
|
||||
public boolean isLineTerminator(int ch) {
|
||||
return ch == '\n';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAfterLineTerminator(int ch, int ch2) {
|
||||
return ch == '\n';
|
||||
}
|
||||
};
|
||||
return unixLT;
|
||||
} else {
|
||||
if (unicodeLT != null)
|
||||
return unicodeLT;
|
||||
unicodeLT = new AbstractLineTerminator() {
|
||||
@Override
|
||||
public boolean isLineTerminator(int ch) {
|
||||
return (ch == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029');
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAfterLineTerminator(int ch, int ch2) {
|
||||
return (ch == '\n' || ch == '\u0085' || (ch | 1) == '\u2029') || (ch == '\r' && ch2 != '\n');
|
||||
}
|
||||
};
|
||||
return unicodeLT;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,272 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Basic class for nodes, representing given regular expression. Note: All the
|
||||
* classes representing nodes has set prefix;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class AbstractSet {
|
||||
|
||||
public static final int TYPE_LEAF = 1 << 0;
|
||||
|
||||
public static final int TYPE_FSET = 1 << 1;
|
||||
|
||||
public static final int TYPE_QUANT = 1 << 3;
|
||||
|
||||
public static final int TYPE_DOTSET = 0x80000000 | '.';
|
||||
|
||||
/**
|
||||
* Next node to visit
|
||||
*/
|
||||
protected AbstractSet next;
|
||||
|
||||
/**
|
||||
* Counter for debugging purposes, represent unique node index;
|
||||
*/
|
||||
static int counter = 1;
|
||||
|
||||
protected boolean isSecondPassVisited = false;
|
||||
|
||||
protected String index = new Integer(AbstractSet.counter++).toString();
|
||||
|
||||
private int type = 0;
|
||||
|
||||
public AbstractSet() {
|
||||
}
|
||||
|
||||
public AbstractSet(AbstractSet n) {
|
||||
next = n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this node matches in given position and recursively call next
|
||||
* node matches on positive self match. Returns positive integer if entire
|
||||
* match succeed, negative otherwise
|
||||
*
|
||||
* @param stringIndex
|
||||
* - string index to start from;
|
||||
* @param testString
|
||||
* - input string
|
||||
* @param matchResult
|
||||
* - MatchResult to sore result into
|
||||
* @return -1 if match fails or n > 0;
|
||||
*/
|
||||
public abstract int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult);
|
||||
|
||||
/**
|
||||
* Attempts to apply pattern starting from this set/stringIndex; returns
|
||||
* index this search was started from, if value is negative, this means that
|
||||
* this search didn't succeed, additional information could be obtained via
|
||||
* matchResult;
|
||||
*
|
||||
* Note: this is default implementation for find method, it's based on
|
||||
* matches, subclasses do not have to override find method unless more
|
||||
* effective find method exists for a particular node type (sequence, i.e.
|
||||
* substring, for example). Same applies for find back method.
|
||||
*
|
||||
* @param stringIndex
|
||||
* starting index
|
||||
* @param testString
|
||||
* string to search in
|
||||
* @param matchResult
|
||||
* result of the match
|
||||
* @return last searched index
|
||||
*/
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int length = matchResult.getRightBound();
|
||||
while (stringIndex <= length) {
|
||||
if (matches(stringIndex, testString, matchResult) >= 0) {
|
||||
return stringIndex;
|
||||
} else {
|
||||
stringIndex++;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param stringIndex
|
||||
* - an index, to finish search back (left limit)
|
||||
* @param startSearch
|
||||
* - an index to start search from (right limit)
|
||||
* @param testString
|
||||
* - test string;
|
||||
* @param matchResult
|
||||
* match result
|
||||
* @return an index to start back search next time if this search fails(new
|
||||
* left bound); if this search fails the value is negative;
|
||||
*/
|
||||
public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) {
|
||||
while (startSearch >= stringIndex) {
|
||||
if (matches(startSearch, testString, matchResult) >= 0) {
|
||||
return startSearch;
|
||||
} else {
|
||||
startSearch--;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true, if this node has consumed any characters during positive
|
||||
* match attempt, for example node representing character always consumes
|
||||
* one character if it matches. If particular node matches empty sting this
|
||||
* method will return false;
|
||||
*
|
||||
* @param matchResult
|
||||
* @return
|
||||
*/
|
||||
public abstract boolean hasConsumed(MatchResultImpl matchResult);
|
||||
|
||||
/**
|
||||
* Returns name for the particular node type. Used for debugging purposes.
|
||||
*/
|
||||
protected abstract String getName();
|
||||
|
||||
protected void setType(int type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public int getType() {
|
||||
return this.type;
|
||||
}
|
||||
|
||||
protected String getQualifiedName() {
|
||||
return "<" + index + ":" + getName() + ">";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getQualifiedName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
public AbstractSet getNext() {
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given node intersects with this one, false otherwise.
|
||||
* This method is being used for quantifiers construction, lets consider the
|
||||
* following regular expression (a|b)*ccc.
|
||||
*
|
||||
* (a|b) does not intersects with "ccc" and thus can be quantified greedily
|
||||
* (w/o kickbacks), like *+ instead of *.
|
||||
*
|
||||
* @param set
|
||||
* - usually previous node
|
||||
*
|
||||
* @return true if the given node intersects with this one
|
||||
*/
|
||||
public boolean first(AbstractSet set) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for replacement backreferenced sets.
|
||||
*
|
||||
* @param prev
|
||||
* - node who references to this node
|
||||
* @return null if current node need not to be replaced JointSet which is
|
||||
* replacement of current node otherwise
|
||||
*/
|
||||
public JointSet processBackRefReplacement() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for traversing nodes after the first stage of
|
||||
* compilation.
|
||||
*/
|
||||
public void processSecondPass() {
|
||||
this.isSecondPassVisited = true;
|
||||
|
||||
if (next != null) {
|
||||
|
||||
if (!next.isSecondPassVisited) {
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
JointSet set = next.processBackRefReplacement();
|
||||
|
||||
if (set != null) {
|
||||
next.isSecondPassVisited = true;
|
||||
next = set;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
next.processSecondPass();
|
||||
} else {
|
||||
|
||||
/*
|
||||
* We reach node through next but it is already traversed. You
|
||||
* can see this situation for AltGroupQuantifierSet.next when we
|
||||
* reach this node through AltGroupQuantifierSet.innerset. ...
|
||||
* .next
|
||||
*/
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
if (next instanceof SingleSet && ((FSet)((JointSet)next).fSet).isBackReferenced) {
|
||||
next = next.next;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* LookAhead FSet, always returns true;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class AheadFSet extends FSet {
|
||||
public AheadFSet() {
|
||||
super(-1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
return stringIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "AheadFSet";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents "?" quantifier over composite sets.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class AltGroupQuantifierSet extends GroupQuantifierSet {
|
||||
public AltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
|
||||
if (nextIndex < 0) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
super.setNext(next);
|
||||
innerSet.setNext(next);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents "?" quantifier over leaf sets.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class AltQuantifierSet extends LeafQuantifierSet {
|
||||
|
||||
public AltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift = 0;
|
||||
|
||||
if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) {
|
||||
return shift;
|
||||
} else {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
super.setNext(next);
|
||||
innerSet.setNext(next);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class AtomicFSet extends FSet {
|
||||
|
||||
int index;
|
||||
|
||||
public AtomicFSet(int groupIndex) {
|
||||
super(groupIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
int gr = getGroupIndex();
|
||||
matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr));
|
||||
index = stringIndex;
|
||||
|
||||
return stringIndex;
|
||||
}
|
||||
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "AtomicFSet";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* This class represent atomic group (?>X), once X matches, this match become
|
||||
* unchangeable till the end of the match.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class AtomicJointSet extends NonCapJointSet {
|
||||
public AtomicJointSet(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int start = matchResult.getConsumed(groupIndex);
|
||||
matchResult.setConsumed(groupIndex, stringIndex);
|
||||
|
||||
int size = children.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
int shift = e.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
// AtomicFset always returns true, but saves the index to run
|
||||
// this next.match() from;
|
||||
return next.matches(((AtomicFSet)fSet).getIndex(), testString, matchResult);
|
||||
}
|
||||
}
|
||||
|
||||
matchResult.setConsumed(groupIndex, start);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "NonCapJointSet"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Back reference node, i.e. \1-9;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class BackReferenceSet extends CIBackReferenceSet {
|
||||
|
||||
public BackReferenceSet(int groupIndex, int consCounter) {
|
||||
super(groupIndex, consCounter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
String group = getString(matchResult);
|
||||
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
|
||||
return -1;
|
||||
int shift = testString.toString().startsWith(group, stringIndex) ? group.length() : -1;
|
||||
|
||||
if (shift < 0) {
|
||||
return -1;
|
||||
}
|
||||
matchResult.setConsumed(consCounter, shift);
|
||||
return next.matches(stringIndex + shift, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
String group = getString(matchResult);
|
||||
int strLength = matchResult.getLeftBound();
|
||||
|
||||
if (group == null || (strIndex + group.length()) > strLength)
|
||||
return -1;
|
||||
|
||||
String testStr = testString.toString();
|
||||
|
||||
while (strIndex <= strLength) {
|
||||
strIndex = testStr.indexOf(group, strIndex);
|
||||
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
if (next.matches(strIndex + group.length(), testString, matchResult) >= 0) {
|
||||
return strIndex;
|
||||
}
|
||||
|
||||
strIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
String group = getString(matchResult);
|
||||
|
||||
if (group == null)
|
||||
return -1;
|
||||
|
||||
String testStr = testString.toString();
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = testStr.lastIndexOf(group, lastIndex);
|
||||
|
||||
if (lastIndex < 0 || lastIndex < strIndex)
|
||||
return -1;
|
||||
if (next.matches(lastIndex + group.length(), testString, matchResult) >= 0) {
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "back reference: " + this.groupIndex; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay Kuznetsov
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Group node over subexpression w/o alternations. This node is used if current
|
||||
* group is referenced via backreference.
|
||||
*/
|
||||
|
||||
class BackReferencedSingleSet extends SingleSet {
|
||||
|
||||
/*
|
||||
* This class is needed only for overwriting find() and findBack() methods
|
||||
* of SingleSet class, which is being back referenced. The following example
|
||||
* explains the need for such substitution: Let's consider the pattern
|
||||
* ".*(.)\\1". Leading .* works as follows: finds line terminator and runs
|
||||
* findBack from that point. findBack method in its turn (in contrast to
|
||||
* matches) sets group boundaries on the back trace. Thus at the point we
|
||||
* try to match back reference(\\1) groups are not yet set.
|
||||
*
|
||||
* To fix this problem we replace backreferenced groups with instances of
|
||||
* this class, which will use matches instead of find; this will affect
|
||||
* performance, but ensure correctness of the match.
|
||||
*/
|
||||
|
||||
public BackReferencedSingleSet(AbstractSet child, FSet fSet) {
|
||||
super(child, fSet);
|
||||
}
|
||||
|
||||
public BackReferencedSingleSet(SingleSet node) {
|
||||
super(node.kid, ((FSet)node.fSet));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int res = 0;
|
||||
int lastIndex = matchResult.getRightBound();
|
||||
int startSearch = stringIndex;
|
||||
|
||||
for (; startSearch <= lastIndex; startSearch++) {
|
||||
int saveStart = matchResult.getStart(groupIndex);
|
||||
|
||||
matchResult.setStart(groupIndex, startSearch);
|
||||
res = kid.matches(startSearch, testString, matchResult);
|
||||
if (res >= 0) {
|
||||
res = startSearch;
|
||||
break;
|
||||
} else {
|
||||
matchResult.setStart(groupIndex, saveStart);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int res = 0;
|
||||
int startSearch = lastIndex;
|
||||
|
||||
for (; startSearch >= stringIndex; startSearch--) {
|
||||
int saveStart = matchResult.getStart(groupIndex);
|
||||
|
||||
matchResult.setStart(groupIndex, startSearch);
|
||||
res = kid.matches(startSearch, testString, matchResult);
|
||||
if (res >= 0) {
|
||||
res = startSearch;
|
||||
break;
|
||||
} else {
|
||||
matchResult.setStart(groupIndex, saveStart);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for replacement backreferenced sets.
|
||||
*
|
||||
* @param prev
|
||||
* - node who references to this node
|
||||
*/
|
||||
@Override
|
||||
public JointSet processBackRefReplacement() {
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* FSet for lookbehind constructs. Checks if string index saved by corresponding
|
||||
* jointSet in "consumers" equals to current index and return current string
|
||||
* index, return -1 otherwise.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class BehindFSet extends FSet {
|
||||
public BehindFSet(int groupIndex) {
|
||||
super(groupIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
int gr = getGroupIndex();
|
||||
int rightBound = matchResult.getConsumed(gr);
|
||||
return (rightBound == stringIndex) ? stringIndex : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "BehindFSet"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Case Insensitive back reference node;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CIBackReferenceSet extends JointSet {
|
||||
protected int referencedGroup;
|
||||
protected int consCounter;
|
||||
|
||||
public CIBackReferenceSet(int groupIndex, int consCounter) {
|
||||
this.referencedGroup = groupIndex;
|
||||
this.consCounter = consCounter;
|
||||
}
|
||||
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
throw new TPatternSyntaxException(strIndex + ", " + testString, "", 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
String group = getString(matchResult);
|
||||
|
||||
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
|
||||
return -1;
|
||||
|
||||
for (int i = 0; i < group.length(); i++) {
|
||||
if (group.charAt(i) != testString.charAt(stringIndex + i) &&
|
||||
TPattern.getSupplement(group.charAt(i)) != testString.charAt(stringIndex + i)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
matchResult.setConsumed(consCounter, group.length());
|
||||
return next.matches(stringIndex + group.length(), testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
protected String getString(MatchResultImpl matchResult) {
|
||||
String res = matchResult.getGroupNoCheck(referencedGroup);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "CI back reference: " + this.groupIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons;
|
||||
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
|
||||
matchResult.setConsumed(consCounter, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character in case insensitive manner.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CICharSet extends LeafSet {
|
||||
|
||||
private char ch;
|
||||
|
||||
private char supplement;
|
||||
|
||||
public CICharSet(char ch) {
|
||||
this.ch = ch;
|
||||
this.supplement = TPattern.getSupplement(ch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return (this.ch == testString.charAt(strIndex) || this.supplement == testString.charAt(strIndex)) ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "CI " + ch;
|
||||
}
|
||||
|
||||
protected char getChar() {
|
||||
return ch;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents case insensitive
|
||||
* canonical decomposition of
|
||||
* Unicode character. Is used when
|
||||
* CANON_EQ flag of Pattern class
|
||||
* is specified.
|
||||
*/
|
||||
class CIDecomposedCharSet extends DecomposedCharSet{
|
||||
|
||||
/*
|
||||
* Just only a stub
|
||||
*/
|
||||
public CIDecomposedCharSet(int [] decomp, int decomposedCharLength) {
|
||||
super(decomp, decomposedCharLength);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents ASCII case insensitive character sequences.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CISequenceSet extends LeafSet {
|
||||
private String string = null;
|
||||
|
||||
CISequenceSet(StringBuffer substring) {
|
||||
this.string = substring.toString();
|
||||
this.charCount = substring.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
for (int i = 0; i < string.length(); i++) {
|
||||
if (string.charAt(i) != testString.charAt(strIndex + i) &&
|
||||
TPattern.getSupplement(string.charAt(i)) != testString.charAt(strIndex + i)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return string.length();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "CI sequence: " + string;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,604 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* User defined character classes ([abef]). See AbstractCharClass documentation
|
||||
* for more details.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CharClass extends AbstractCharClass {
|
||||
// Flag indicates if we add supplement upper/lower case
|
||||
boolean ci = false;
|
||||
|
||||
boolean uci = false;
|
||||
|
||||
// Flag indicates if there are unicode supplements
|
||||
boolean hasUCI = false;
|
||||
|
||||
boolean invertedSurrogates = false;
|
||||
|
||||
boolean inverted = false;
|
||||
|
||||
boolean hideBits = false;
|
||||
|
||||
BitSet bits = new BitSet();
|
||||
|
||||
AbstractCharClass nonBitSet = null;
|
||||
|
||||
public CharClass() {
|
||||
}
|
||||
|
||||
public CharClass(boolean ci, boolean uci) {
|
||||
this.ci = ci;
|
||||
this.uci = uci;
|
||||
}
|
||||
|
||||
public CharClass(boolean negative, boolean ci, boolean uci) {
|
||||
this(ci, uci);
|
||||
setNegative(negative);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can use this method safely even if nonBitSet != null due to specific
|
||||
* of range constructions in regular expressions.
|
||||
*/
|
||||
public CharClass add(int ch) {
|
||||
if (ci) {
|
||||
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
|
||||
if (!inverted) {
|
||||
bits.set(TPattern.getSupplement((char)ch));
|
||||
} else {
|
||||
bits.clear(TPattern.getSupplement((char)ch));
|
||||
}
|
||||
} else if (uci && ch > 128) {
|
||||
hasUCI = true;
|
||||
ch = Character.toLowerCase(Character.toUpperCase(ch));
|
||||
// return this;
|
||||
}
|
||||
}
|
||||
|
||||
if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) {
|
||||
if (!invertedSurrogates) {
|
||||
lowHighSurrogates.set(ch - Character.MIN_SURROGATE);
|
||||
} else {
|
||||
lowHighSurrogates.clear(ch - Character.MIN_SURROGATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (!inverted) {
|
||||
bits.set(ch);
|
||||
} else
|
||||
bits.clear(ch);
|
||||
|
||||
if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) {
|
||||
mayContainSupplCodepoints = true;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/*
|
||||
* The difference between add(AbstractCharClass) and
|
||||
* union(AbstractCharClass) is that add() is used for constructions like
|
||||
* "[^abc\\d]" (this pattern doesn't match "1") while union is used for
|
||||
* constructions like "[^abc[\\d]]" (this pattern matches "1").
|
||||
*/
|
||||
public CharClass add(final AbstractCharClass cc) {
|
||||
|
||||
if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) {
|
||||
mayContainSupplCodepoints = true;
|
||||
}
|
||||
|
||||
if (!invertedSurrogates) {
|
||||
|
||||
// A | !B = ! ((A ^ B) & B)
|
||||
if (cc.altSurrogates) {
|
||||
lowHighSurrogates.xor(cc.getLowHighSurrogates());
|
||||
lowHighSurrogates.and(cc.getLowHighSurrogates());
|
||||
altSurrogates = !altSurrogates;
|
||||
invertedSurrogates = true;
|
||||
|
||||
// A | B
|
||||
} else {
|
||||
lowHighSurrogates.or(cc.getLowHighSurrogates());
|
||||
}
|
||||
} else {
|
||||
|
||||
// !A | !B = !(A & B)
|
||||
if (cc.altSurrogates) {
|
||||
lowHighSurrogates.and(cc.getLowHighSurrogates());
|
||||
|
||||
// !A | B = !(A & !B)
|
||||
} else {
|
||||
lowHighSurrogates.andNot(cc.getLowHighSurrogates());
|
||||
}
|
||||
}
|
||||
|
||||
if (!hideBits && cc.getBits() != null) {
|
||||
if (!inverted) {
|
||||
|
||||
// A | !B = ! ((A ^ B) & B)
|
||||
if (cc.isNegative()) {
|
||||
bits.xor(cc.getBits());
|
||||
bits.and(cc.getBits());
|
||||
alt = !alt;
|
||||
inverted = true;
|
||||
|
||||
// A | B
|
||||
} else {
|
||||
bits.or(cc.getBits());
|
||||
}
|
||||
} else {
|
||||
|
||||
// !A | !B = !(A & B)
|
||||
if (cc.isNegative()) {
|
||||
bits.and(cc.getBits());
|
||||
|
||||
// !A | B = !(A & !B)
|
||||
} else {
|
||||
bits.andNot(cc.getBits());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final boolean curAlt = alt;
|
||||
|
||||
if (nonBitSet == null) {
|
||||
|
||||
if (curAlt && !inverted && bits.isEmpty()) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return cc.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = true;
|
||||
} else {
|
||||
|
||||
/*
|
||||
* We keep the value of alt unchanged for constructions like
|
||||
* [^[abc]fgb] by using the formula a ^ b == !a ^ !b.
|
||||
*/
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !((curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch)));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return (curAlt ^ bits.get(ch)) || ((curAlt ^ inverted) ^ cc.contains(ch));
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
|
||||
hideBits = true;
|
||||
} else {
|
||||
final AbstractCharClass nb = nonBitSet;
|
||||
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !(curAlt ^ (nb.contains(ch) || cc.contains(ch)));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return curAlt ^ (nb.contains(ch) || cc.contains(ch));
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public CharClass add(int st, int end) {
|
||||
if (st > end)
|
||||
throw new IllegalArgumentException();
|
||||
if (!ci
|
||||
|
||||
// no intersection with surrogate characters
|
||||
&&
|
||||
(end < Character.MIN_SURROGATE || st > Character.MAX_SURROGATE)) {
|
||||
if (!inverted) {
|
||||
bits.set(st, end + 1);
|
||||
} else {
|
||||
bits.clear(st, end + 1);
|
||||
}
|
||||
} else {
|
||||
for (int i = st; i < end + 1; i++) {
|
||||
add(i);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
// OR operation
|
||||
public void union(final AbstractCharClass clazz) {
|
||||
if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) {
|
||||
mayContainSupplCodepoints = true;
|
||||
}
|
||||
|
||||
if (clazz.hasUCI())
|
||||
this.hasUCI = true;
|
||||
|
||||
if (altSurrogates ^ clazz.altSurrogates) {
|
||||
|
||||
// !A | B = !(A & !B)
|
||||
if (altSurrogates) {
|
||||
lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
|
||||
|
||||
// A | !B = !((A ^ B) & B)
|
||||
} else {
|
||||
lowHighSurrogates.xor(clazz.getLowHighSurrogates());
|
||||
lowHighSurrogates.and(clazz.getLowHighSurrogates());
|
||||
altSurrogates = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// !A | !B = !(A & B)
|
||||
if (altSurrogates) {
|
||||
lowHighSurrogates.and(clazz.getLowHighSurrogates());
|
||||
|
||||
// A | B
|
||||
} else {
|
||||
lowHighSurrogates.or(clazz.getLowHighSurrogates());
|
||||
}
|
||||
}
|
||||
|
||||
if (!hideBits && clazz.getBits() != null) {
|
||||
if (alt ^ clazz.isNegative()) {
|
||||
|
||||
// !A | B = !(A & !B)
|
||||
if (alt) {
|
||||
bits.andNot(clazz.getBits());
|
||||
|
||||
// A | !B = !((A ^ B) & B)
|
||||
} else {
|
||||
bits.xor(clazz.getBits());
|
||||
bits.and(clazz.getBits());
|
||||
alt = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// !A | !B = !(A & B)
|
||||
if (alt) {
|
||||
bits.and(clazz.getBits());
|
||||
|
||||
// A | B
|
||||
} else {
|
||||
bits.or(clazz.getBits());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final boolean curAlt = alt;
|
||||
|
||||
if (nonBitSet == null) {
|
||||
|
||||
if (!inverted && bits.isEmpty()) {
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
} else {
|
||||
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !(clazz.contains(ch) || (curAlt ^ bits.get(ch)));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return clazz.contains(ch) || (curAlt ^ bits.get(ch));
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
hideBits = true;
|
||||
} else {
|
||||
final AbstractCharClass nb = nonBitSet;
|
||||
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return (curAlt ^ nb.contains(ch)) || clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AND operation
|
||||
public void intersection(final AbstractCharClass clazz) {
|
||||
if (!mayContainSupplCodepoints && clazz.mayContainSupplCodepoints) {
|
||||
mayContainSupplCodepoints = true;
|
||||
}
|
||||
|
||||
if (clazz.hasUCI())
|
||||
this.hasUCI = true;
|
||||
|
||||
if (altSurrogates ^ clazz.altSurrogates) {
|
||||
|
||||
// !A & B = ((A ^ B) & B)
|
||||
if (altSurrogates) {
|
||||
lowHighSurrogates.xor(clazz.getLowHighSurrogates());
|
||||
lowHighSurrogates.and(clazz.getLowHighSurrogates());
|
||||
altSurrogates = false;
|
||||
|
||||
// A & !B
|
||||
} else {
|
||||
lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
|
||||
}
|
||||
} else {
|
||||
|
||||
// !A & !B = !(A | B)
|
||||
if (altSurrogates) {
|
||||
lowHighSurrogates.or(clazz.getLowHighSurrogates());
|
||||
|
||||
// A & B
|
||||
} else {
|
||||
lowHighSurrogates.and(clazz.getLowHighSurrogates());
|
||||
}
|
||||
}
|
||||
|
||||
if (!hideBits && clazz.getBits() != null) {
|
||||
|
||||
if (alt ^ clazz.isNegative()) {
|
||||
|
||||
// !A & B = ((A ^ B) & B)
|
||||
if (alt) {
|
||||
bits.xor(clazz.getBits());
|
||||
bits.and(clazz.getBits());
|
||||
alt = false;
|
||||
|
||||
// A & !B
|
||||
} else {
|
||||
bits.andNot(clazz.getBits());
|
||||
}
|
||||
} else {
|
||||
|
||||
// !A & !B = !(A | B)
|
||||
if (alt) {
|
||||
bits.or(clazz.getBits());
|
||||
|
||||
// A & B
|
||||
} else {
|
||||
bits.and(clazz.getBits());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final boolean curAlt = alt;
|
||||
|
||||
if (nonBitSet == null) {
|
||||
|
||||
if (!inverted && bits.isEmpty()) {
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
} else {
|
||||
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !(clazz.contains(ch) && (curAlt ^ bits.get(ch)));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return clazz.contains(ch) && (curAlt ^ bits.get(ch));
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
hideBits = true;
|
||||
} else {
|
||||
final AbstractCharClass nb = nonBitSet;
|
||||
|
||||
if (curAlt) {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch));
|
||||
}
|
||||
};
|
||||
// alt = true
|
||||
} else {
|
||||
nonBitSet = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return (curAlt ^ nb.contains(ch)) && clazz.contains(ch);
|
||||
}
|
||||
};
|
||||
// alt = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if character class contains symbol specified,
|
||||
* <code>false</code> otherwise. Note: #setNegative() method changes the
|
||||
* meaning of contains method;
|
||||
*
|
||||
* @param ch
|
||||
* @return <code>true</code> if character class contains symbol specified;
|
||||
*
|
||||
* TODO: currently <code>character class</code> implementation based
|
||||
* on BitSet, but this implementation possibly will be turned to
|
||||
* combined BitSet(for first 256 symbols) and Black/Red tree for the
|
||||
* rest of UTF.
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
if (nonBitSet == null) {
|
||||
return this.alt ^ bits.get(ch);
|
||||
} else {
|
||||
return alt ^ nonBitSet.contains(ch);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitSet getBits() {
|
||||
if (hideBits)
|
||||
return null;
|
||||
return bits;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BitSet getLowHighSurrogates() {
|
||||
return lowHighSurrogates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractCharClass getInstance() {
|
||||
|
||||
if (nonBitSet == null) {
|
||||
final BitSet bs = getBits();
|
||||
|
||||
AbstractCharClass res = new AbstractCharClass() {
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return this.alt ^ bs.get(ch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder temp = new StringBuilder();
|
||||
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
|
||||
temp.append(Character.toChars(i));
|
||||
temp.append('|');
|
||||
}
|
||||
|
||||
if (temp.length() > 0)
|
||||
temp.deleteCharAt(temp.length() - 1);
|
||||
|
||||
return temp.toString();
|
||||
}
|
||||
|
||||
};
|
||||
return res.setNegative(isNegative());
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging purposes only
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder temp = new StringBuilder();
|
||||
for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
|
||||
temp.append(Character.toChars(i));
|
||||
temp.append('|');
|
||||
}
|
||||
|
||||
if (temp.length() > 0)
|
||||
temp.deleteCharAt(temp.length() - 1);
|
||||
|
||||
return temp.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasUCI() {
|
||||
return hasUCI;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CharSet extends LeafSet {
|
||||
|
||||
private char ch = 0;
|
||||
|
||||
public CharSet(char ch) {
|
||||
this.ch = ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int charCount() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return (this.ch == testString.charAt(strIndex)) ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (strIndex < strLength) {
|
||||
strIndex = testStr.indexOf(ch, strIndex);
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
|
||||
return strIndex;
|
||||
}
|
||||
strIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.find(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = testStr.lastIndexOf(ch, lastIndex);
|
||||
if (lastIndex < 0 || lastIndex < strIndex) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.findBack(strIndex, lastIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "" + ch; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
protected char getChar() {
|
||||
return ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return ((CharSet)set).getChar() == ch;
|
||||
} else if (set instanceof RangeSet) {
|
||||
return ((RangeSet)set).accepts(0, Character.toString(ch)) > 0;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return ((SupplRangeSet)set).contains(ch);
|
||||
} else if (set instanceof SupplCharSet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Composite (i.e. {n,m}) quantifier node for groups ("(X){n,m}")
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CompositeGroupQuantifierSet extends GroupQuantifierSet {
|
||||
|
||||
protected Quantifier quantifier = null;
|
||||
|
||||
int setCounter;
|
||||
|
||||
/**
|
||||
* Constructs CompositeGroupQuantifierSet
|
||||
*
|
||||
* @param quant
|
||||
* - given composite quantifier
|
||||
* @param innerSet
|
||||
* - given group
|
||||
* @param next
|
||||
* - next set after the quantifier
|
||||
*/
|
||||
public CompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
|
||||
int setCounter) {
|
||||
super(innerSet, next, type);
|
||||
this.quantifier = quant;
|
||||
this.setCounter = setCounter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int enterCounter = matchResult.getEnterCounter(setCounter);
|
||||
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
// can't go inner set;
|
||||
if (enterCounter >= quantifier.max()) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
// go inner set;
|
||||
matchResult.setEnterCounter(setCounter, ++enterCounter);
|
||||
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
|
||||
if (nextIndex < 0) {
|
||||
matchResult.setEnterCounter(setCounter, --enterCounter);
|
||||
if (enterCounter >= quantifier.min()) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
matchResult.setEnterCounter(setCounter, 0);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
matchResult.setEnterCounter(setCounter, 0);
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
quantifier.resetCounter();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return quantifier.toString();
|
||||
}
|
||||
|
||||
void setQuantifier(Quantifier quant) {
|
||||
this.quantifier = quant;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Composite (i.e. {n,m}) quantifier node over the leaf nodes ("a{n,m}")
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class CompositeQuantifierSet extends LeafQuantifierSet {
|
||||
|
||||
protected Quantifier quantifier = null;
|
||||
|
||||
public CompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
this.quantifier = quant;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int min = quantifier.min();
|
||||
int max = quantifier.max();
|
||||
int i = 0;
|
||||
|
||||
for (; i < min; i++) {
|
||||
|
||||
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int shift = leaf.accepts(stringIndex, testString);
|
||||
if (shift < 1) {
|
||||
return -1;
|
||||
}
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
for (; i < max; i++) {
|
||||
int shift;
|
||||
if (stringIndex + leaf.charCount() > matchResult.getRightBound() ||
|
||||
(shift = leaf.accepts(stringIndex, testString)) < 1) {
|
||||
break;
|
||||
}
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
for (; i >= min; i--) {
|
||||
int shift = next.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
stringIndex -= leaf.charCount();
|
||||
}
|
||||
return -1;
|
||||
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
quantifier.resetCounter();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return quantifier.toString();
|
||||
}
|
||||
|
||||
void setQuantifier(Quantifier quant) {
|
||||
this.quantifier = quant;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright 2014 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class is used to split the range that contains surrogate characters into
|
||||
* two ranges: the first consisting of these surrogate characters and the second
|
||||
* consisting of all others characters from the parent range. This class
|
||||
* represents the parent range split in such a manner.
|
||||
*/
|
||||
class CompositeRangeSet extends JointSet {
|
||||
|
||||
// range without surrogates
|
||||
AbstractSet withoutSurrogates;
|
||||
|
||||
// range containing surrogates only
|
||||
AbstractSet withSurrogates;
|
||||
|
||||
public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates, AbstractSet next) {
|
||||
this.withoutSurrogates = withoutSurrogates;
|
||||
this.withSurrogates = withSurrogates;
|
||||
setNext(next);
|
||||
}
|
||||
|
||||
public CompositeRangeSet(AbstractSet withoutSurrogates, AbstractSet withSurrogates) {
|
||||
this.withoutSurrogates = withoutSurrogates;
|
||||
this.withSurrogates = withSurrogates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift = withoutSurrogates.matches(stringIndex, testString, matchResult);
|
||||
|
||||
if (shift < 0) {
|
||||
shift = withSurrogates.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
withSurrogates.setNext(next);
|
||||
withoutSurrogates.setNext(next);
|
||||
}
|
||||
|
||||
public AbstractSet getSurrogates() {
|
||||
return withSurrogates;
|
||||
}
|
||||
|
||||
public AbstractSet getWithoutSurrogates() {
|
||||
return withoutSurrogates;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "CompositeRangeSet: " + " <nonsurrogate> " + withoutSurrogates + " <surrogate> " + withSurrogates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,260 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents canonical decomposition of Unicode character. Is used when
|
||||
* CANON_EQ flag of Pattern class is specified.
|
||||
*/
|
||||
class DecomposedCharSet extends JointSet {
|
||||
|
||||
/**
|
||||
* Contains information about number of chars that were read for a codepoint
|
||||
* last time
|
||||
*/
|
||||
private int readCharsForCodePoint = 1;
|
||||
|
||||
/**
|
||||
* UTF-16 encoding of decomposedChar
|
||||
*/
|
||||
private String decomposedCharUTF16 = null;
|
||||
|
||||
/**
|
||||
* Decomposition of the Unicode codepoint
|
||||
*/
|
||||
private int[] decomposedChar;
|
||||
|
||||
/**
|
||||
* Length of useful part of decomposedChar decomposedCharLength <=
|
||||
* decomposedChar.length
|
||||
*/
|
||||
private int decomposedCharLength;
|
||||
|
||||
public DecomposedCharSet(int[] decomposedChar, int decomposedCharLength) {
|
||||
this.decomposedChar = decomposedChar;
|
||||
this.decomposedCharLength = decomposedCharLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
/*
|
||||
* All decompositions have length that is less or equal
|
||||
* Lexer.MAX_DECOMPOSITION_LENGTH
|
||||
*/
|
||||
int[] decCurCodePoint;
|
||||
int[] decCodePoint = new int[Lexer.MAX_DECOMPOSITION_LENGTH];
|
||||
int readCodePoints = 0;
|
||||
int rightBound = matchResult.getRightBound();
|
||||
int curChar;
|
||||
int i = 0;
|
||||
|
||||
if (strIndex >= rightBound) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We read testString and decompose it gradually to compare with this
|
||||
* decomposedChar at position strIndex
|
||||
*/
|
||||
curChar = codePointAt(strIndex, testString, rightBound);
|
||||
strIndex += readCharsForCodePoint;
|
||||
decCurCodePoint = Lexer.getDecomposition(curChar);
|
||||
if (decCurCodePoint == null) {
|
||||
decCodePoint[readCodePoints++] = curChar;
|
||||
} else {
|
||||
i = decCurCodePoint.length;
|
||||
System.arraycopy(decCurCodePoint, 0, decCodePoint, 0, i);
|
||||
readCodePoints += i;
|
||||
}
|
||||
|
||||
if (strIndex < rightBound) {
|
||||
curChar = codePointAt(strIndex, testString, rightBound);
|
||||
|
||||
/*
|
||||
* Read testString until we met a decomposed char boundary and
|
||||
* decompose obtained portion of testString
|
||||
*/
|
||||
while ((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH) && !Lexer.isDecomposedCharBoundary(curChar)) {
|
||||
|
||||
if (Lexer.hasDecompositionNonNullCanClass(curChar)) {
|
||||
|
||||
/*
|
||||
* A few codepoints have decompositions and non null
|
||||
* canonical classes, we have to take them into
|
||||
* consideration, but general rule is: if canonical class !=
|
||||
* 0 then no decomposition
|
||||
*/
|
||||
decCurCodePoint = Lexer.getDecomposition(curChar);
|
||||
|
||||
/*
|
||||
* Length of such decomposition is 1 or 2. See UnicodeData
|
||||
* file http://www.unicode.org/Public/4.0-Update
|
||||
* /UnicodeData-4.0.0.txt
|
||||
*/
|
||||
if (decCurCodePoint.length == 2) {
|
||||
decCodePoint[readCodePoints++] = decCurCodePoint[0];
|
||||
decCodePoint[readCodePoints++] = decCurCodePoint[1];
|
||||
} else {
|
||||
decCodePoint[readCodePoints++] = decCurCodePoint[0];
|
||||
}
|
||||
} else {
|
||||
decCodePoint[readCodePoints++] = curChar;
|
||||
}
|
||||
|
||||
strIndex += readCharsForCodePoint;
|
||||
|
||||
if (strIndex < rightBound) {
|
||||
curChar = codePointAt(strIndex, testString, rightBound);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Some optimization since length of decomposed char is <= 3 usually
|
||||
*/
|
||||
switch (readCodePoints) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
break;
|
||||
|
||||
case 3:
|
||||
int i1 = Lexer.getCanonicalClass(decCodePoint[1]);
|
||||
int i2 = Lexer.getCanonicalClass(decCodePoint[2]);
|
||||
|
||||
if ((i2 != 0) && (i1 > i2)) {
|
||||
i1 = decCodePoint[1];
|
||||
decCodePoint[1] = decCodePoint[2];
|
||||
decCodePoint[2] = i1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
decCodePoint = Lexer.getCanonicalOrder(decCodePoint, readCodePoints);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare decomposedChar with decomposed char that was just read from
|
||||
* testString
|
||||
*/
|
||||
if (readCodePoints != decomposedCharLength) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < readCodePoints; i++) {
|
||||
if (decCodePoint[i] != decomposedChar[i]) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return UTF-16 encoding of given Unicode codepoint.
|
||||
*
|
||||
* @return UTF-16 encoding
|
||||
*/
|
||||
private String getDecomposedChar() {
|
||||
if (decomposedCharUTF16 == null) {
|
||||
StringBuilder strBuff = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < decomposedCharLength; i++) {
|
||||
strBuff.append(Character.toChars(decomposedChar[i]));
|
||||
}
|
||||
decomposedCharUTF16 = strBuff.toString();
|
||||
}
|
||||
return decomposedCharUTF16;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "decomposed char:" + getDecomposedChar(); //$NON-NLS-1$
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads Unicode codepoint from input.
|
||||
*
|
||||
* @param strIndex
|
||||
* - index to read codepoint at
|
||||
* @param testString
|
||||
* - input
|
||||
* @param matchResult
|
||||
* - auxiliary object
|
||||
* @return codepoint at given strIndex at testString and
|
||||
*/
|
||||
public int codePointAt(int strIndex, CharSequence testString, int rightBound) {
|
||||
|
||||
/*
|
||||
* We store information about number of codepoints we read at variable
|
||||
* readCharsForCodePoint.
|
||||
*/
|
||||
int curChar;
|
||||
|
||||
readCharsForCodePoint = 1;
|
||||
if (strIndex < rightBound - 1) {
|
||||
char high = testString.charAt(strIndex++);
|
||||
char low = testString.charAt(strIndex);
|
||||
|
||||
if (Character.isSurrogatePair(high, low)) {
|
||||
char[] curCodePointUTF16 = new char[] { high, low };
|
||||
curChar = Character.codePointAt(curCodePointUTF16, 0);
|
||||
readCharsForCodePoint = 2;
|
||||
} else {
|
||||
curChar = high;
|
||||
}
|
||||
} else {
|
||||
curChar = testString.charAt(strIndex);
|
||||
}
|
||||
|
||||
return curChar;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return (set instanceof DecomposedCharSet) ? ((DecomposedCharSet)set).getDecomposedChar().equals(
|
||||
getDecomposedChar()) : true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Special node for ".*" construction for any character including line
|
||||
* terminators.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class DotAllQuantifierSet extends QuantifierSet {
|
||||
|
||||
public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
if (strLength <= stringIndex) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
return next.findBack(stringIndex, strLength, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
if (next.findBack(stringIndex, strLength, testString, matchResult) >= 0) {
|
||||
return stringIndex;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<DotAllQuant>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Node accepting any character including line terminators.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class DotAllSet extends JointSet {
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
if (stringIndex + 1 > strLength) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char high = testString.charAt(stringIndex);
|
||||
|
||||
if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
|
||||
char low = testString.charAt(stringIndex + 1);
|
||||
|
||||
if (Character.isSurrogatePair(high, low)) {
|
||||
return next.matches(stringIndex + 2, testString, matchResult);
|
||||
}
|
||||
}
|
||||
return next.matches(stringIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "DotAll";
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return AbstractSet.TYPE_DOTSET;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Special node for ".*" construction. The main idea here is to find line
|
||||
* terminator and try to find the rest of the construction from this point.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class DotQuantifierSet extends QuantifierSet {
|
||||
|
||||
AbstractLineTerminator lt;
|
||||
|
||||
public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type, AbstractLineTerminator lt) {
|
||||
super(innerSet, next, type);
|
||||
this.lt = lt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
int startSearch = /* testString.toString().indexOf('\n', stringIndex); */
|
||||
findLineTerminator(stringIndex, strLength, testString);
|
||||
|
||||
if (startSearch < 0) {
|
||||
startSearch = strLength;
|
||||
}
|
||||
|
||||
if (startSearch <= stringIndex) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
return next.findBack(stringIndex, startSearch, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
// String testStr = testString.toString();
|
||||
int strLength = matchResult.getRightBound();
|
||||
// 1. skip line terminators ???
|
||||
// //
|
||||
// we don't skip line terminators here, but return zero match instead
|
||||
// //
|
||||
|
||||
// 2. find first occurrence of the searched pattern
|
||||
// //
|
||||
int res = next.find(stringIndex, testString, matchResult);
|
||||
|
||||
// 3. Check if we have other occurrences till the end of line
|
||||
// (because .* is greedy and we need last one)
|
||||
// //
|
||||
if (res >= 0) {
|
||||
int nextSearch = findLineTerminator(res, strLength, testString);
|
||||
// testStr.indexOf('\n', res);
|
||||
if (nextSearch < 0) {
|
||||
nextSearch = strLength;
|
||||
}
|
||||
nextSearch = next.findBack(res, nextSearch, testString, matchResult);
|
||||
res = (res < nextSearch) ? nextSearch : res;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// 4. find left boundary of this search
|
||||
// //
|
||||
int leftBound = (res > 0) ? findBackLineTerminator(stringIndex, res - 1, testString)/*
|
||||
* testStr
|
||||
* .
|
||||
* lastIndexOf
|
||||
* (
|
||||
* '\n'
|
||||
* ,
|
||||
* res
|
||||
* -
|
||||
* 1
|
||||
* )
|
||||
*/
|
||||
: (res == 0) ? 0 : -1;
|
||||
res = (leftBound >= stringIndex) ? ((leftBound < res) ? leftBound + 1 : leftBound) : stringIndex;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* All line terminators are from Basic Multilingual Pane
|
||||
*/
|
||||
private int findLineTerminator(int from, int to, CharSequence testString) {
|
||||
for (int i = from; i < to; i++) {
|
||||
if (lt.isLineTerminator(testString.charAt(i))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int findBackLineTerminator(int from, int to, CharSequence testString) {
|
||||
for (int i = to; i >= from; i--) {
|
||||
if (lt.isLineTerminator(testString.charAt(i))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<DotQuant>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Node accepting any character except line terminators;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
final class DotSet extends JointSet {
|
||||
|
||||
AbstractLineTerminator lt;
|
||||
|
||||
public DotSet(AbstractLineTerminator lt) {
|
||||
super();
|
||||
this.lt = lt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
if (stringIndex + 1 > strLength) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
char high = testString.charAt(stringIndex);
|
||||
|
||||
if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
|
||||
char low = testString.charAt(stringIndex + 1);
|
||||
|
||||
if (Character.isSurrogatePair(high, low)) {
|
||||
return lt.isLineTerminator(Character.toCodePoint(high, low)) ? -1 : next.matches(stringIndex + 2,
|
||||
testString, matchResult);
|
||||
}
|
||||
}
|
||||
|
||||
return lt.isLineTerminator(high) ? -1 : next.matches(stringIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "."; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return AbstractSet.TYPE_DOTSET;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents end of input '\z', i.e. matches only character after the last one;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class EOISet extends AbstractSet {
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int rightBound = matchResult.hasTransparentBounds() ? testString.length() : matchResult.getRightBound();
|
||||
if (stringIndex < rightBound)
|
||||
return -1;
|
||||
|
||||
matchResult.hitEnd = true;
|
||||
matchResult.requireEnd = true;
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns false, enough for quantifiers
|
||||
*/
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "EOI";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
final class EOLSet extends AbstractSet {
|
||||
private int consCounter;
|
||||
|
||||
public EOLSet(int counter) {
|
||||
this.consCounter = counter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length();
|
||||
|
||||
if (strIndex >= rightBound) {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
// check final line terminator;
|
||||
if ((rightBound - strIndex) == 2 && testString.charAt(strIndex) == '\r' &&
|
||||
testString.charAt(strIndex + 1) == '\n') {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
char ch;
|
||||
|
||||
if ((rightBound - strIndex) == 1 &&
|
||||
(((ch = testString.charAt(strIndex)) == '\n' || ch == '\r' || ch == '\u0085' || (ch | 1) == '\u2029'))) {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons;
|
||||
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
|
||||
matchResult.setConsumed(consCounter, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<EOL>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Valid constant zero character match.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class EmptySet extends LeafSet {
|
||||
public EmptySet(AbstractSet next) {
|
||||
super(next);
|
||||
charCount = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int stringIndex, CharSequence testString) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
int startStr = matchResult.getLeftBound();
|
||||
|
||||
while (stringIndex <= strLength) {
|
||||
|
||||
// check for supplementary codepoints
|
||||
if (stringIndex < strLength) {
|
||||
char low = testString.charAt(stringIndex);
|
||||
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
|
||||
if (stringIndex > startStr) {
|
||||
char high = testString.charAt(stringIndex - 1);
|
||||
if (Character.isHighSurrogate(high)) {
|
||||
stringIndex++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(stringIndex, testString, matchResult) >= 0) {
|
||||
return stringIndex;
|
||||
}
|
||||
stringIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int stringIndex, int startSearch, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
int startStr = matchResult.getLeftBound();
|
||||
|
||||
while (startSearch >= stringIndex) {
|
||||
|
||||
// check for supplementary codepoints
|
||||
if (startSearch < strLength) {
|
||||
char low = testString.charAt(startSearch);
|
||||
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
|
||||
if (startSearch > startStr) {
|
||||
char high = testString.charAt(startSearch - 1);
|
||||
if (Character.isHighSurrogate(high)) {
|
||||
startSearch--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(startSearch, testString, matchResult) >= 0) {
|
||||
return startSearch;
|
||||
}
|
||||
startSearch--;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<Empty set>";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* The node which marks end of the particular group.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class FSet extends AbstractSet {
|
||||
|
||||
static PossessiveFSet posFSet = new PossessiveFSet();
|
||||
|
||||
boolean isBackReferenced = false;
|
||||
|
||||
private int groupIndex;
|
||||
|
||||
public FSet(int groupIndex) {
|
||||
this.groupIndex = groupIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int end = matchResult.getEnd(groupIndex);
|
||||
matchResult.setEnd(groupIndex, stringIndex);
|
||||
int shift = next.matches(stringIndex, testString, matchResult);
|
||||
/*
|
||||
* if(shift >=0 && matchResult.getEnd(groupIndex) == -1) {
|
||||
* matchResult.setEnd(groupIndex, stringIndex); }
|
||||
*/
|
||||
if (shift < 0)
|
||||
matchResult.setEnd(groupIndex, end);
|
||||
return shift;
|
||||
}
|
||||
|
||||
public int getGroupIndex() {
|
||||
return groupIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "fSet"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the end of the particular group and not take into account possible
|
||||
* kickbacks(required for atomic groups, for instance)
|
||||
*
|
||||
*/
|
||||
static class PossessiveFSet extends AbstractSet {
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
return stringIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "posFSet"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Special construction which marks end of pattern.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class FinalSet extends FSet {
|
||||
|
||||
public FinalSet() {
|
||||
super(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (matchResult.mode() == TMatcher.MODE_FIND || stringIndex == matchResult.getRightBound()) {
|
||||
matchResult.setValid();
|
||||
matchResult.setEnd(0, stringIndex);
|
||||
return stringIndex;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "FinalSet"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Default quantifier over groups, in fact this type of quantifier is generally
|
||||
* used for constructions we cant identify number of characters they consume.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class GroupQuantifierSet extends QuantifierSet {
|
||||
|
||||
public GroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);// return
|
||||
// -1;
|
||||
|
||||
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
|
||||
if (nextIndex < 0) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<GroupQuant>"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents canonical decomposition of Hangul syllable. Is used when CANON_EQ
|
||||
* flag of Pattern class is specified.
|
||||
*/
|
||||
class HangulDecomposedCharSet extends JointSet {
|
||||
|
||||
/**
|
||||
* Decomposed Hangul syllable.
|
||||
*/
|
||||
private char[] decomposedChar;
|
||||
|
||||
/**
|
||||
* String representing syllable
|
||||
*/
|
||||
private String decomposedCharUTF16 = null;
|
||||
|
||||
/**
|
||||
* Length of useful part of decomposedChar decomposedCharLength <=
|
||||
* decomposedChar.length
|
||||
*/
|
||||
private int decomposedCharLength;
|
||||
|
||||
public HangulDecomposedCharSet(char[] decomposedChar, int decomposedCharLength) {
|
||||
this.decomposedChar = decomposedChar;
|
||||
this.decomposedCharLength = decomposedCharLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Give string representation of this.
|
||||
*
|
||||
* @return - string representation.
|
||||
*/
|
||||
private String getDecomposedChar() {
|
||||
return (decomposedCharUTF16 == null) ? (decomposedCharUTF16 = new String(decomposedChar)) : decomposedCharUTF16;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "decomposed Hangul syllable:" + getDecomposedChar(); //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
/*
|
||||
* All decompositions for Hangul syllables have length that is less or
|
||||
* equal Lexer.MAX_DECOMPOSITION_LENGTH
|
||||
*/
|
||||
int rightBound = matchResult.getRightBound();
|
||||
int SyllIndex = 0;
|
||||
int[] decompSyllable = new int[Lexer.MAX_HANGUL_DECOMPOSITION_LENGTH];
|
||||
int[] decompCurSymb;
|
||||
char curSymb;
|
||||
|
||||
/*
|
||||
* For details about Hangul composition and decomposition see
|
||||
* http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf
|
||||
* "3.12 Conjoining Jamo Behavior"
|
||||
*/
|
||||
int LIndex = -1;
|
||||
int VIndex = -1;
|
||||
int TIndex = -1;
|
||||
|
||||
if (strIndex >= rightBound) {
|
||||
return -1;
|
||||
}
|
||||
curSymb = testString.charAt(strIndex++);
|
||||
decompCurSymb = Lexer.getHangulDecomposition(curSymb);
|
||||
|
||||
if (decompCurSymb == null) {
|
||||
|
||||
/*
|
||||
* We deal with ordinary letter or sequence of jamos at strIndex at
|
||||
* testString.
|
||||
*/
|
||||
decompSyllable[SyllIndex++] = curSymb;
|
||||
LIndex = curSymb - Lexer.LBase;
|
||||
|
||||
if ((LIndex < 0) || (LIndex >= Lexer.LCount)) {
|
||||
|
||||
/*
|
||||
* Ordinary letter, that doesn't match this
|
||||
*/
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strIndex < rightBound) {
|
||||
curSymb = testString.charAt(strIndex);
|
||||
VIndex = curSymb - Lexer.VBase;
|
||||
}
|
||||
|
||||
if ((VIndex < 0) || (VIndex >= Lexer.VCount)) {
|
||||
|
||||
/*
|
||||
* Single L jamo doesn't compose Hangul syllable, so doesn't
|
||||
* match
|
||||
*/
|
||||
return -1;
|
||||
}
|
||||
strIndex++;
|
||||
decompSyllable[SyllIndex++] = curSymb;
|
||||
|
||||
if (strIndex < rightBound) {
|
||||
curSymb = testString.charAt(strIndex);
|
||||
TIndex = curSymb - Lexer.TBase;
|
||||
}
|
||||
|
||||
if ((TIndex < 0) || (TIndex >= Lexer.TCount)) {
|
||||
|
||||
/*
|
||||
* We deal with LV syllable at testString, so compare it to this
|
||||
*/
|
||||
return ((decomposedCharLength == 2) && (decompSyllable[0] == decomposedChar[0]) && (decompSyllable[1] == decomposedChar[1])) ? next
|
||||
.matches(strIndex, testString, matchResult) : -1;
|
||||
}
|
||||
strIndex++;
|
||||
decompSyllable[SyllIndex++] = curSymb;
|
||||
|
||||
/*
|
||||
* We deal with LVT syllable at testString, so compare it to this
|
||||
*/
|
||||
return ((decomposedCharLength == 3) && (decompSyllable[0] == decomposedChar[0]) &&
|
||||
(decompSyllable[1] == decomposedChar[1]) && (decompSyllable[2] == decomposedChar[2])) ? next
|
||||
.matches(strIndex, testString, matchResult) : -1;
|
||||
} else {
|
||||
|
||||
/*
|
||||
* We deal with Hangul syllable at strIndex at testString. So we
|
||||
* decomposed it to compare with this.
|
||||
*/
|
||||
int i = 0;
|
||||
|
||||
if (decompCurSymb.length != decomposedCharLength) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (; i < decomposedCharLength; i++) {
|
||||
if (decompCurSymb[i] != decomposedChar[i]) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return (set instanceof HangulDecomposedCharSet) ? ((HangulDecomposedCharSet)set).getDecomposedChar().equals(
|
||||
getDecomposedChar()) : true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents high surrogate character.
|
||||
*/
|
||||
class HighSurrogateCharSet extends JointSet {
|
||||
|
||||
/*
|
||||
* Note that we can use high and low surrogate characters that don't combine
|
||||
* into supplementary code point. See
|
||||
* http://www.unicode.org/reports/tr18/#Supplementary_Characters
|
||||
*/
|
||||
|
||||
private char high;
|
||||
|
||||
public HighSurrogateCharSet(char high) {
|
||||
this.high = high;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
if (stringIndex + 1 > strLength) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char high = testString.charAt(stringIndex);
|
||||
|
||||
if (stringIndex + 1 < strLength) {
|
||||
char low = testString.charAt(stringIndex + 1);
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.high == high) {
|
||||
return next.matches(stringIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (strIndex < strLength) {
|
||||
|
||||
strIndex = testStr.indexOf(high, strIndex);
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
|
||||
if (strIndex + 1 < strLength) {
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isLowSurrogate(testStr.charAt(strIndex + 1))) {
|
||||
strIndex += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
|
||||
return strIndex;
|
||||
}
|
||||
strIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.find(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = testStr.lastIndexOf(high, lastIndex);
|
||||
if (lastIndex < 0 || lastIndex < strIndex) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (lastIndex + 1 < strLength) {
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isLowSurrogate(testStr.charAt(lastIndex + 1))) {
|
||||
lastIndex--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.findBack(strIndex, lastIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "" + high;
|
||||
}
|
||||
|
||||
protected int getChar() {
|
||||
return high;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return false;
|
||||
} else if (set instanceof RangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplCharSet) {
|
||||
return false;
|
||||
} else if (set instanceof LowSurrogateCharSet) {
|
||||
return false;
|
||||
} else if (set instanceof HighSurrogateCharSet) {
|
||||
return ((HighSurrogateCharSet)set).high == this.high;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
import java.text.MessageFormat;
|
||||
|
||||
/**
|
||||
* Internationalization stub. All the messages in java.util.regexp
|
||||
* package done though this class. This class should be lately replaced with
|
||||
* real internationalization utility.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*
|
||||
*/
|
||||
class I18n {
|
||||
public static String getMessage(String message) {
|
||||
return message;
|
||||
}
|
||||
|
||||
public static String getFormattedMessage(String message, Object arg1) {
|
||||
return MessageFormat.format(message, new Object[] {arg1});
|
||||
}
|
||||
|
||||
public static String getFormattedMessage(String message, Object arg1, Object arg2) {
|
||||
return MessageFormat.format(message, new Object[] {arg1, arg2});
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Hashtable implementation for int arrays.
|
||||
*/
|
||||
class IntArrHash {
|
||||
final int[] table;
|
||||
|
||||
final Object[] values;
|
||||
|
||||
final int mask;
|
||||
|
||||
public IntArrHash(int size) {
|
||||
int tmpMask = 0;
|
||||
while (size >= tmpMask) {
|
||||
tmpMask = (tmpMask << 1) | 1;
|
||||
}
|
||||
mask = (tmpMask << 1) | 1;
|
||||
table = new int[mask + 1];
|
||||
values = new Object [mask + 1];
|
||||
}
|
||||
|
||||
public void put(int key, int [] value) {
|
||||
int i = 0;
|
||||
int hashCode = key & mask;
|
||||
|
||||
for (; ; ) {
|
||||
if (table[hashCode] == 0 // empty
|
||||
|| table[hashCode] == key) { // rewrite
|
||||
table[hashCode] = key;
|
||||
values[hashCode] = value;
|
||||
return;
|
||||
}
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
|
||||
public int [] get(int key) {
|
||||
int hashCode = key & mask;
|
||||
int i = 0;
|
||||
int storedKey;
|
||||
|
||||
for (; ; ) {
|
||||
storedKey = table[hashCode];
|
||||
|
||||
if (storedKey == 0) { // empty
|
||||
return null;
|
||||
}
|
||||
|
||||
if (storedKey == key) {
|
||||
return (int []) values[hashCode];
|
||||
}
|
||||
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Hashtable implementation for int values.
|
||||
*/
|
||||
class IntHash {
|
||||
int[] table;
|
||||
|
||||
int[] values;
|
||||
|
||||
int mask;
|
||||
|
||||
int size; // maximum shift
|
||||
|
||||
public IntHash(int size) {
|
||||
while (size >= mask) {
|
||||
mask = (mask << 1) | 1;
|
||||
}
|
||||
mask = (mask << 1) | 1;
|
||||
table = new int[mask + 1];
|
||||
values = new int[mask + 1];
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public void put(int key, int value) {
|
||||
int i = 0;
|
||||
int hashCode = key & mask;
|
||||
|
||||
for (; ; ) {
|
||||
if (table[hashCode] == 0 // empty
|
||||
|| table[hashCode] == key) { // rewrite
|
||||
table[hashCode] = key;
|
||||
values[hashCode] = value;
|
||||
return;
|
||||
}
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
|
||||
public int get(int key) {
|
||||
int hashCode = key & mask;
|
||||
int i = 0;
|
||||
int storedKey;
|
||||
|
||||
for (; ; ) {
|
||||
storedKey = table[hashCode];
|
||||
|
||||
if (storedKey == 0) { // empty
|
||||
return size;
|
||||
}
|
||||
|
||||
if (storedKey == key) {
|
||||
return values[hashCode];
|
||||
}
|
||||
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Represents group, which is alternation of other subexpression. One should
|
||||
* think about "group" in this model as JointSet opening group and corresponding
|
||||
* FSet closing group.
|
||||
*/
|
||||
class JointSet extends AbstractSet {
|
||||
|
||||
protected ArrayList<AbstractSet> children;
|
||||
|
||||
protected AbstractSet fSet;
|
||||
|
||||
protected int groupIndex;
|
||||
|
||||
protected JointSet() {
|
||||
}
|
||||
|
||||
public JointSet(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
this.children = children;
|
||||
this.fSet = fSet;
|
||||
this.groupIndex = fSet.getGroupIndex();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (children == null) {
|
||||
return -1;
|
||||
}
|
||||
int start = matchResult.getStart(groupIndex);
|
||||
matchResult.setStart(groupIndex, stringIndex);
|
||||
int size = children.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
int shift = e.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
}
|
||||
matchResult.setStart(groupIndex, start);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
fSet.setNext(next);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return fSet.getNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "JointSet"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
public int getGroup() {
|
||||
return groupIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (children != null) {
|
||||
for (Iterator<AbstractSet> i = children.iterator(); i.hasNext();) {
|
||||
if ((i.next()).first(set)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return !(matchResult.getEnd(groupIndex) >= 0 && matchResult.getStart(groupIndex) == matchResult
|
||||
.getEnd(groupIndex));
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for traversing nodes after the first stage of
|
||||
* compilation.
|
||||
*/
|
||||
@Override
|
||||
public void processSecondPass() {
|
||||
this.isSecondPassVisited = true;
|
||||
|
||||
if (fSet != null && !fSet.isSecondPassVisited) {
|
||||
fSet.processSecondPass();
|
||||
}
|
||||
|
||||
if (children != null) {
|
||||
int childrenSize = children.size();
|
||||
|
||||
for (int i = 0; i < childrenSize; i++) {
|
||||
AbstractSet child = children.get(i);
|
||||
JointSet set = child.processBackRefReplacement();
|
||||
|
||||
if (set != null) {
|
||||
child.isSecondPassVisited = true;
|
||||
children.remove(i);
|
||||
children.add(i, set);
|
||||
child = set;
|
||||
}
|
||||
|
||||
if (!child.isSecondPassVisited) {
|
||||
child.processSecondPass();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (next != null) {
|
||||
super.processSecondPass();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class LeafQuantifierSet extends QuantifierSet {
|
||||
|
||||
protected LeafSet leaf;
|
||||
|
||||
public LeafQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
this.leaf = innerSet;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int i = 0;
|
||||
int shift = 0;
|
||||
|
||||
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
|
||||
(shift = leaf.accepts(stringIndex, testString)) > 0) {
|
||||
stringIndex += shift;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i >= 0; i--) {
|
||||
shift = next.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
|
||||
stringIndex -= leaf.charCount();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<Quant>";
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an inner set.
|
||||
*
|
||||
* @param innerSet
|
||||
* The innerSet to set.
|
||||
*/
|
||||
@Override
|
||||
public void setInnerSet(AbstractSet innerSet) {
|
||||
if (!(innerSet instanceof LeafSet))
|
||||
throw new RuntimeException("");
|
||||
super.setInnerSet(innerSet);
|
||||
this.leaf = (LeafSet)innerSet;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Base class for nodes representing leaf tokens of the RE, those who consumes
|
||||
* fixed number of characters.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class LeafSet extends AbstractSet {
|
||||
|
||||
protected int charCount = 1;
|
||||
|
||||
public LeafSet(AbstractSet next) {
|
||||
super(next);
|
||||
setType(AbstractSet.TYPE_LEAF);
|
||||
}
|
||||
|
||||
public LeafSet() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns "shift", the number of accepted chars commonly internal function,
|
||||
* but called by quantifiers.
|
||||
*/
|
||||
public abstract int accepts(int stringIndex, CharSequence testString);
|
||||
|
||||
/**
|
||||
* Checks if we can enter this state and pass the control to the next one.
|
||||
* Return positive value if match succeeds, negative otherwise.
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (stringIndex + charCount() > matchResult.getRightBound()) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int shift = accepts(stringIndex, testString);
|
||||
if (shift < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return next.matches(stringIndex + shift, testString, matchResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns number of characters this node consumes.
|
||||
*
|
||||
* @return number of characters this node consumes.
|
||||
*/
|
||||
public int charCount() {
|
||||
return charCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return true;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/*
|
||||
* This class is a range that contains only surrogate characters.
|
||||
*/
|
||||
class LowHighSurrogateRangeSet extends JointSet {
|
||||
|
||||
protected AbstractCharClass surrChars;
|
||||
|
||||
protected boolean alt = false;
|
||||
|
||||
public LowHighSurrogateRangeSet(AbstractCharClass surrChars, AbstractSet next) {
|
||||
this.surrChars = surrChars.getInstance();
|
||||
this.alt = surrChars.alt;
|
||||
setNext(next);
|
||||
}
|
||||
|
||||
public LowHighSurrogateRangeSet(AbstractCharClass surrChars) {
|
||||
this.surrChars = surrChars.getInstance();
|
||||
this.alt = surrChars.alt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int startStr = matchResult.getLeftBound();
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
if (stringIndex + 1 > strLength) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char ch = testString.charAt(stringIndex);
|
||||
|
||||
if (!surrChars.contains(ch)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (Character.isHighSurrogate(ch)) {
|
||||
|
||||
if (stringIndex + 1 < strLength) {
|
||||
char low = testString.charAt(stringIndex + 1);
|
||||
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} else if (Character.isLowSurrogate(ch)) {
|
||||
|
||||
if (stringIndex > startStr) {
|
||||
char high = testString.charAt(stringIndex - 1);
|
||||
|
||||
if (Character.isHighSurrogate(high)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return next.matches(stringIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "range:" + (alt ? "^ " : " ") + surrChars.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return false;
|
||||
} else if (set instanceof RangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplCharSet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected AbstractCharClass getChars() {
|
||||
return surrChars;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents low surrogate character.
|
||||
*/
|
||||
class LowSurrogateCharSet extends JointSet {
|
||||
|
||||
/*
|
||||
* Note that we can use high and low surrogate characters that don't combine
|
||||
* into supplementary code point. See
|
||||
* http://www.unicode.org/reports/tr18/#Supplementary_Characters
|
||||
*/
|
||||
private char low;
|
||||
|
||||
public LowSurrogateCharSet(char low) {
|
||||
this.low = low;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next.
|
||||
*/
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets next abstract set.
|
||||
*
|
||||
* @param next
|
||||
* The next to set.
|
||||
*/
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (stringIndex + 1 > matchResult.getRightBound()) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
char low = testString.charAt(stringIndex);
|
||||
|
||||
if (stringIndex > matchResult.getLeftBound()) {
|
||||
char high = testString.charAt(stringIndex - 1);
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isHighSurrogate(high)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.low == low) {
|
||||
return next.matches(stringIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
int startStr = matchResult.getLeftBound();
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (strIndex < strLength) {
|
||||
|
||||
strIndex = testStr.indexOf(low, strIndex);
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
|
||||
if (strIndex > startStr) {
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isHighSurrogate(testStr.charAt(strIndex - 1))) {
|
||||
strIndex++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
|
||||
return strIndex;
|
||||
}
|
||||
strIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.find(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (testString instanceof String) {
|
||||
int startStr = matchResult.getLeftBound();
|
||||
String testStr = (String)testString;
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = testStr.lastIndexOf(low, lastIndex);
|
||||
if (lastIndex < 0 || lastIndex < strIndex) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (lastIndex > startStr) {
|
||||
|
||||
/*
|
||||
* we consider high surrogate followed by low surrogate as a
|
||||
* codepoint
|
||||
*/
|
||||
if (Character.isHighSurrogate(testStr.charAt(lastIndex - 1))) {
|
||||
lastIndex -= 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.findBack(strIndex, lastIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "" + low;
|
||||
}
|
||||
|
||||
protected int getChar() {
|
||||
return low;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return false;
|
||||
} else if (set instanceof RangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return false;
|
||||
} else if (set instanceof SupplCharSet) {
|
||||
return false;
|
||||
} else if (set instanceof HighSurrogateCharSet) {
|
||||
return false;
|
||||
} else if (set instanceof LowSurrogateCharSet) {
|
||||
return ((LowSurrogateCharSet)set).low == this.low;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Match result implementation Note: probably it might make sense to combine
|
||||
* this class with Matcher.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class MatchResultImpl implements TMatchResult {
|
||||
|
||||
private int[] groupBounds = null;
|
||||
|
||||
private int[] consumers = null;
|
||||
|
||||
private int[] compQuantCounters = null;
|
||||
|
||||
private CharSequence string = null;
|
||||
|
||||
private int groupCount = 0;
|
||||
|
||||
private boolean valid = false;
|
||||
|
||||
private int leftBound;
|
||||
|
||||
private int rightBound;
|
||||
|
||||
int startIndex;
|
||||
|
||||
private boolean transparentBounds = false;
|
||||
|
||||
private boolean anchoringBounds = false;
|
||||
|
||||
boolean hitEnd = false;
|
||||
|
||||
boolean requireEnd = false;
|
||||
|
||||
int previousMatch = -1;
|
||||
|
||||
private int mode;
|
||||
|
||||
MatchResultImpl(CharSequence string, int leftBound, int rightBound, int groupCount, int compQuantCount,
|
||||
int consumersCount) {
|
||||
this.groupCount = ++groupCount;
|
||||
this.groupBounds = new int[groupCount * 2];
|
||||
|
||||
this.consumers = new int[consumersCount];
|
||||
Arrays.fill(consumers, -1);
|
||||
|
||||
if (compQuantCount > 0)
|
||||
this.compQuantCounters = new int[compQuantCount];
|
||||
Arrays.fill(groupBounds, -1);
|
||||
reset(string, leftBound, rightBound);
|
||||
}
|
||||
|
||||
TMatchResult cloneImpl() {
|
||||
MatchResultImpl res = new MatchResultImpl(this.string, this.leftBound, this.rightBound, this.groupCount - 1, 0,
|
||||
0);
|
||||
|
||||
res.valid = valid;
|
||||
if (valid) {
|
||||
System.arraycopy(groupBounds, 0, res.groupBounds, 0, this.groupBounds.length);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public void setConsumed(int counter, int value) {
|
||||
this.consumers[counter] = value;
|
||||
}
|
||||
|
||||
public int getConsumed(int counter) {
|
||||
return this.consumers[counter];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return end(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end(int group) {
|
||||
checkGroup(group);
|
||||
return groupBounds[group * 2 + 1];
|
||||
}
|
||||
|
||||
void setStart(int group, int offset) {
|
||||
groupBounds[group * 2] = offset;
|
||||
}
|
||||
|
||||
void setEnd(int group, int offset) {
|
||||
groupBounds[group * 2 + 1] = offset;
|
||||
}
|
||||
|
||||
int getStart(int group) {
|
||||
return groupBounds[group * 2];
|
||||
}
|
||||
|
||||
int getEnd(int group) {
|
||||
return groupBounds[group * 2 + 1];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String group() {
|
||||
return group(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String group(int group) {
|
||||
if (start(group) < 0)
|
||||
return null;
|
||||
return string.subSequence(start(group), end(group)).toString();
|
||||
}
|
||||
|
||||
String getGroupNoCheck(int group) {
|
||||
int st = getStart(group);
|
||||
int end = getEnd(group);
|
||||
if ((end | st | (end - st)) < 0 || end > string.length())
|
||||
return null;
|
||||
|
||||
return string.subSequence(st, end).toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int groupCount() {
|
||||
return groupCount - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return start(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start(int group) {
|
||||
checkGroup(group);
|
||||
return groupBounds[group * 2];
|
||||
}
|
||||
|
||||
/*
|
||||
* This method being called after any successful match; For now it's being
|
||||
* used to check zero group for empty match;
|
||||
*/
|
||||
public void finalizeMatch() {
|
||||
if (this.groupBounds[0] == -1) {
|
||||
this.groupBounds[0] = this.startIndex;
|
||||
this.groupBounds[1] = this.startIndex;
|
||||
}
|
||||
|
||||
previousMatch = end();
|
||||
}
|
||||
|
||||
public int getEnterCounter(int setCounter) {
|
||||
return compQuantCounters[setCounter];
|
||||
}
|
||||
|
||||
public void setEnterCounter(int setCounter, int value) {
|
||||
compQuantCounters[setCounter] = value;
|
||||
}
|
||||
|
||||
private void checkGroup(int group) {
|
||||
if (!valid) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
|
||||
if (group < 0 || group > groupCount) {
|
||||
throw new IndexOutOfBoundsException(String.valueOf(group));
|
||||
}
|
||||
}
|
||||
|
||||
void updateGroup(int index, int srtOffset, int endOffset) {
|
||||
checkGroup(index);
|
||||
groupBounds[index * 2] = srtOffset;
|
||||
groupBounds[index * 2 + 1] = endOffset;
|
||||
}
|
||||
|
||||
protected void setValid() {
|
||||
this.valid = true;
|
||||
}
|
||||
|
||||
protected boolean isValid() {
|
||||
return this.valid;
|
||||
}
|
||||
|
||||
protected void reset(CharSequence newSequence, int leftBound, int rightBound) {
|
||||
valid = false;
|
||||
mode = TMatcher.MODE_MATCH;
|
||||
Arrays.fill(groupBounds, -1);
|
||||
Arrays.fill(consumers, -1);
|
||||
|
||||
if (newSequence != null)
|
||||
this.string = newSequence;
|
||||
if (leftBound >= 0)
|
||||
this.setBounds(leftBound, rightBound);
|
||||
this.startIndex = this.leftBound;
|
||||
}
|
||||
|
||||
protected void reset() {
|
||||
reset(null, -1, -1);
|
||||
}
|
||||
|
||||
private void setBounds(int leftBound, int rightBound) {
|
||||
this.leftBound = leftBound;
|
||||
this.rightBound = rightBound;
|
||||
}
|
||||
|
||||
protected void setStartIndex(int startIndex) {
|
||||
this.startIndex = startIndex;
|
||||
previousMatch = previousMatch >= 0 ? previousMatch : startIndex;
|
||||
}
|
||||
|
||||
public int getLeftBound() {
|
||||
return this.leftBound;
|
||||
}
|
||||
|
||||
public int getRightBound() {
|
||||
return this.rightBound;
|
||||
}
|
||||
|
||||
protected void setMode(int mode) {
|
||||
this.mode = mode;
|
||||
}
|
||||
|
||||
protected int mode() {
|
||||
return mode;
|
||||
}
|
||||
|
||||
protected void useAnchoringBounds(boolean value) {
|
||||
this.anchoringBounds = value;
|
||||
}
|
||||
|
||||
protected boolean hasAnchoringBounds() {
|
||||
return this.anchoringBounds;
|
||||
}
|
||||
|
||||
protected void useTransparentBounds(boolean value) {
|
||||
this.transparentBounds = value;
|
||||
}
|
||||
|
||||
protected boolean hasTransparentBounds() {
|
||||
return this.transparentBounds;
|
||||
}
|
||||
|
||||
int getPreviousMatchEnd() {
|
||||
return previousMatch;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents multiline version of the dollar sign.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class MultiLineEOLSet extends AbstractSet {
|
||||
|
||||
private int consCounter;
|
||||
|
||||
public MultiLineEOLSet(int counter) {
|
||||
this.consCounter = counter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strDif = matchResult.hasAnchoringBounds() ? matchResult.getLeftBound() - strIndex : testString.length() -
|
||||
strIndex;
|
||||
char ch1;
|
||||
char ch2;
|
||||
if (strDif == 0) {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
} else if (strDif >= 2) {
|
||||
ch1 = testString.charAt(strIndex);
|
||||
ch2 = testString.charAt(strIndex + 1);
|
||||
} else {
|
||||
ch1 = testString.charAt(strIndex);
|
||||
ch2 = 'a';
|
||||
}
|
||||
|
||||
switch (ch1) {
|
||||
case '\r': {
|
||||
if (ch2 == '\n') {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
case '\n':
|
||||
case '\u0085':
|
||||
case '\u2028':
|
||||
case '\u2029': {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons;
|
||||
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
|
||||
matchResult.setConsumed(consCounter, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<MultiLine $>"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Multiline version of the ^ sign.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class MultiLineSOLSet extends AbstractSet {
|
||||
|
||||
private AbstractLineTerminator lt;
|
||||
|
||||
public MultiLineSOLSet(AbstractLineTerminator lt) {
|
||||
this.lt = lt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (strIndex != matchResult.getRightBound() &&
|
||||
((strIndex == 0 || (matchResult.hasAnchoringBounds() && strIndex == matchResult.getLeftBound())) || lt
|
||||
.isAfterLineTerminator(testString.charAt(strIndex - 1), testString.charAt(strIndex)))) {
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "^";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Negative look ahead node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class NegativeLookAhead extends AtomicJointSet {
|
||||
|
||||
public NegativeLookAhead(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int size = children.size();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
if (e.matches(stringIndex, testString, matchResult) >= 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "NegLookaheadJointSet";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Negative look behind node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class NegativeLookBehind extends AtomicJointSet {
|
||||
|
||||
public NegativeLookBehind(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
|
||||
int size = children.size();
|
||||
int shift;
|
||||
|
||||
// fSet will take this index to check if we at the right bound
|
||||
// and return true if the current index equal to this one
|
||||
matchResult.setConsumed(groupIndex, stringIndex);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
// find limits could be calculated though e.getCharCount()
|
||||
// fSet will return true only if string index at fSet equal
|
||||
// to stringIndex
|
||||
shift = e.findBack(0, stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "NegBehindJointSet";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Non-capturing group closing node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class NonCapFSet extends FSet {
|
||||
public NonCapFSet(int groupIndex) {
|
||||
super(groupIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int gr = getGroupIndex();
|
||||
matchResult.setConsumed(gr, stringIndex - matchResult.getConsumed(gr));
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "NonCapFSet";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Node representing non-capturing group
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class NonCapJointSet extends JointSet {
|
||||
|
||||
protected NonCapJointSet() {
|
||||
}
|
||||
|
||||
public NonCapJointSet(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
int start = matchResult.getConsumed(groupIndex);
|
||||
matchResult.setConsumed(groupIndex, stringIndex);
|
||||
|
||||
int size = children.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
int shift = e.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
}
|
||||
matchResult.setConsumed(groupIndex, start);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "NonCapJointSet"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons = matchResult.getConsumed(groupIndex);
|
||||
return cons != 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive quantifier over group, see java.util.regex.GroupQuantifierSet for
|
||||
* more details.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PosAltGroupQuantifierSet extends AltGroupQuantifierSet {
|
||||
|
||||
public PosAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
((JointSet)innerSet).setNext(FSet.posFSet);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
if (nextIndex > 0) {
|
||||
stringIndex = nextIndex;
|
||||
}
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive composite (i.e. {n,m}) quantifier node over groups.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PosCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet {
|
||||
|
||||
public PosCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
|
||||
int setCounter) {
|
||||
super(quant, innerSet, next, type, setCounter);
|
||||
innerSet.setNext(FSet.posFSet);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int nextIndex;
|
||||
int counter = 0;
|
||||
int max = quantifier.max();
|
||||
|
||||
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex && counter < max) {
|
||||
counter++;
|
||||
stringIndex = nextIndex;
|
||||
}
|
||||
|
||||
if (nextIndex < 0 && counter < quantifier.min()) {
|
||||
return -1;
|
||||
} else {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive + quantifier node over groups.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PosPlusGroupQuantifierSet extends GroupQuantifierSet {
|
||||
|
||||
public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
((JointSet)innerSet).setNext(FSet.posFSet);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int nextIndex;
|
||||
if ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) < 0) {
|
||||
return -1;
|
||||
} else if (nextIndex > stringIndex) {
|
||||
stringIndex = nextIndex;
|
||||
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > stringIndex) {
|
||||
stringIndex = nextIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Positive lookahead node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PositiveLookAhead extends AtomicJointSet {
|
||||
public PositiveLookAhead(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int size = children.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
int shift = e.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
// PosLookaheadFset always returns true, position remains the
|
||||
// same
|
||||
// next.match() from;
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "PosLookaheadJointSet";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Positive lookbehind node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PositiveLookBehind extends AtomicJointSet {
|
||||
|
||||
public PositiveLookBehind(ArrayList<AbstractSet> children, FSet fSet) {
|
||||
super(children, fSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns stringIndex+shift, the next position to match
|
||||
*/
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
int size = children.size();
|
||||
int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound();
|
||||
|
||||
int shift = next.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
// fSet will take this index to check if we at the right bound
|
||||
// and return true if the current index equal to this one
|
||||
matchResult.setConsumed(groupIndex, stringIndex);
|
||||
for (int i = 0; i < size; i++) {
|
||||
AbstractSet e = children.get(i);
|
||||
// find limits could be calculated though e.getCharCount()
|
||||
// fSet will return true only if string index at fSet equal
|
||||
// to stringIndex
|
||||
if (e.findBack(leftBound, stringIndex, testString, matchResult) >= 0) {
|
||||
matchResult.setConsumed(groupIndex, -1);
|
||||
return shift;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "PosBehindJointSet";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive ? quantifier node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PossessiveAltQuantifierSet extends AltQuantifierSet {
|
||||
public PossessiveAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift = 0;
|
||||
|
||||
if (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
|
||||
(shift = leaf.accepts(stringIndex, testString)) >= 1) {
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive composite (i.e. {n, m}) quantifier node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PossessiveCompositeQuantifierSet extends CompositeQuantifierSet {
|
||||
public PossessiveCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(quant, innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int min = quantifier.min();
|
||||
int max = quantifier.max();
|
||||
int i = 0;
|
||||
|
||||
for (; i < min; i++) {
|
||||
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
int shift = leaf.accepts(stringIndex, testString);
|
||||
if (shift < 1) {
|
||||
return -1;
|
||||
}
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
for (; i < max; i++) {
|
||||
int shift;
|
||||
if (stringIndex + leaf.charCount() > matchResult.getRightBound() ||
|
||||
(shift = leaf.accepts(stringIndex, testString)) < 1) {
|
||||
break;
|
||||
}
|
||||
stringIndex += shift;
|
||||
}
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive quantifier set over groups.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PossessiveGroupQuantifierSet extends GroupQuantifierSet {
|
||||
public PossessiveGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
innerSet.setNext(FSet.posFSet);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int nextIndex;
|
||||
while ((nextIndex = innerSet.matches(stringIndex, testString, matchResult)) > 0) {
|
||||
stringIndex = nextIndex;
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Possessive quantifier set over LeafSet's
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PossessiveQuantifierSet extends LeafQuantifierSet {
|
||||
|
||||
public PossessiveQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift = 0;
|
||||
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
|
||||
(shift = leaf.accepts(stringIndex, testString)) >= 1) {
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Node representing previous match (\G).
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class PreviousMatch extends AbstractSet {
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
if (stringIndex == matchResult.getPreviousMatchEnd()) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "PreviousMatch";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents RE quantifier; contains two fields responsible for min and max
|
||||
* number of repetitions. Negative value for maximum number of repetition
|
||||
* represents infinity(i.e. +,*)
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class Quantifier extends SpecialToken implements Cloneable {
|
||||
|
||||
private int min;
|
||||
|
||||
private int max;
|
||||
|
||||
private int counter = 0;
|
||||
|
||||
public Quantifier(int min) {
|
||||
this.min = this.max = min;
|
||||
}
|
||||
|
||||
public Quantifier(int min, int max) {
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
public void resetCounter() {
|
||||
counter = 0;
|
||||
}
|
||||
|
||||
public int getCounter() {
|
||||
return counter;
|
||||
}
|
||||
|
||||
public void setCounter(int counter) {
|
||||
this.counter = counter;
|
||||
}
|
||||
|
||||
public int min() {
|
||||
return min;
|
||||
}
|
||||
|
||||
public int max() {
|
||||
return max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{" + min + "," + ((max == Integer.MAX_VALUE) ? "" : new Integer(max).toString()) + "}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return SpecialToken.TOK_QUANTIFIER;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new Quantifier(min, max);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Base class for quantifiers.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class QuantifierSet extends AbstractSet {
|
||||
|
||||
protected AbstractSet innerSet;
|
||||
|
||||
public QuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(next);
|
||||
this.innerSet = innerSet;
|
||||
setType(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the innerSet.
|
||||
*/
|
||||
public AbstractSet getInnerSet() {
|
||||
return innerSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an inner set.
|
||||
*
|
||||
* @param innerSet
|
||||
* The innerSet to set.
|
||||
*/
|
||||
public void setInnerSet(AbstractSet innerSet) {
|
||||
this.innerSet = innerSet;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return innerSet.first(set) || next.first(set);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for traversing nodes after the first stage of
|
||||
* compilation.
|
||||
*/
|
||||
@Override
|
||||
public void processSecondPass() {
|
||||
this.isSecondPassVisited = true;
|
||||
|
||||
if (next != null) {
|
||||
|
||||
if (!next.isSecondPassVisited) {
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
JointSet set = next.processBackRefReplacement();
|
||||
|
||||
if (set != null) {
|
||||
next.isSecondPassVisited = true;
|
||||
next = set;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
next.processSecondPass();
|
||||
}
|
||||
}
|
||||
|
||||
if (innerSet != null) {
|
||||
|
||||
if (!innerSet.isSecondPassVisited) {
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
JointSet set = innerSet.processBackRefReplacement();
|
||||
|
||||
if (set != null) {
|
||||
innerSet.isSecondPassVisited = true;
|
||||
innerSet = set;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
innerSet.processSecondPass();
|
||||
} else {
|
||||
|
||||
/*
|
||||
* We reach node through innerSet but it is already traversed.
|
||||
* You can see this situation for GroupQuantifierSet.innerset if
|
||||
* we compile smth like "(a)+ when GroupQuantifierSet ==
|
||||
* GroupQuantifierSet.innerset.fSet.next
|
||||
*/
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
if (innerSet instanceof SingleSet && ((FSet)((JointSet)innerSet).fSet).isBackReferenced) {
|
||||
innerSet = innerSet.next;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character from the given char class.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
|
||||
class RangeSet extends LeafSet {
|
||||
|
||||
private AbstractCharClass chars;
|
||||
|
||||
private boolean alt = false;
|
||||
|
||||
public RangeSet(AbstractCharClass cs, AbstractSet next) {
|
||||
super(next);
|
||||
this.chars = cs.getInstance();
|
||||
this.alt = cs.alt;
|
||||
}
|
||||
|
||||
public RangeSet(AbstractCharClass cc) {
|
||||
this.chars = cc.getInstance();
|
||||
this.alt = cc.alt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return chars.contains(testString.charAt(strIndex)) ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "range:" + (alt ? "^ " : " ") + chars.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return AbstractCharClass.intersects(chars, ((CharSet)set).getChar());
|
||||
} else if (set instanceof RangeSet) {
|
||||
return AbstractCharClass.intersects(chars, ((RangeSet)set).chars);
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).getChars());
|
||||
} else if (set instanceof SupplCharSet) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected AbstractCharClass getChars() {
|
||||
return chars;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Reluctant version of "?" quantifier set over group.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class RelAltGroupQuantifierSet extends AltGroupQuantifierSet {
|
||||
|
||||
public RelAltGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
int nextIndex = next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
if (nextIndex < 0) {
|
||||
return innerSet.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Reluctant version of composite (i.e. {n,m}) quantifier node over group.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class RelCompositeGroupQuantifierSet extends CompositeGroupQuantifierSet {
|
||||
|
||||
public RelCompositeGroupQuantifierSet(Quantifier quant, AbstractSet innerSet, AbstractSet next, int type,
|
||||
int setCounter) {
|
||||
super(quant, innerSet, next, type, setCounter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int enterCounter = matchResult.getEnterCounter(setCounter);
|
||||
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
// can't go inner set;
|
||||
if (enterCounter >= quantifier.max()) {
|
||||
matchResult.setEnterCounter(setCounter, 0);
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
int nextIndex;
|
||||
|
||||
if (enterCounter >= quantifier.min()) {
|
||||
nextIndex = next.matches(stringIndex, testString, matchResult);
|
||||
if (nextIndex < 0) {
|
||||
matchResult.setEnterCounter(setCounter, ++enterCounter);
|
||||
nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
matchResult.setEnterCounter(setCounter, 0);
|
||||
return nextIndex;
|
||||
}
|
||||
} else {
|
||||
matchResult.setEnterCounter(setCounter, ++enterCounter);
|
||||
nextIndex = innerSet.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents ?? quantifier over leaf sets.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class ReluctantAltQuantifierSet extends AltQuantifierSet {
|
||||
public ReluctantAltQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift;
|
||||
|
||||
if ((shift = next.matches(stringIndex, testString, matchResult)) >= 0) {
|
||||
return shift;
|
||||
} else {
|
||||
return innerSet.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Reluctant version of composite(i.e. {n,m}) quantifier set over leaf nodes.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class ReluctantCompositeQuantifierSet extends CompositeQuantifierSet {
|
||||
public ReluctantCompositeQuantifierSet(Quantifier quant, LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(quant, innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int min = quantifier.min();
|
||||
int max = quantifier.max();
|
||||
int i = 0;
|
||||
int shift = 0;
|
||||
|
||||
for (; i < min; i++) {
|
||||
|
||||
if (stringIndex + leaf.charCount() > matchResult.getRightBound()) {
|
||||
matchResult.hitEnd = true;
|
||||
return -1;
|
||||
}
|
||||
|
||||
shift = leaf.accepts(stringIndex, testString);
|
||||
if (shift < 1) {
|
||||
return -1;
|
||||
}
|
||||
stringIndex += shift;
|
||||
}
|
||||
|
||||
do {
|
||||
shift = next.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
|
||||
if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) {
|
||||
shift = leaf.accepts(stringIndex, testString);
|
||||
stringIndex += shift;
|
||||
i++;
|
||||
}
|
||||
|
||||
} while (shift >= 1 && i <= max);
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Relactant version of the group quantifier set.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class ReluctantGroupQuantifierSet extends GroupQuantifierSet {
|
||||
public ReluctantGroupQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
if (!innerSet.hasConsumed(matchResult))
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
|
||||
int res = next.matches(stringIndex, testString, matchResult);
|
||||
if (res < 0) {
|
||||
return innerSet.matches(stringIndex, testString, matchResult);
|
||||
} else {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents [+*]? constructs over LeafSets.
|
||||
*
|
||||
* @see java.util.regex.LeafSet
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class ReluctantQuantifierSet extends LeafQuantifierSet {
|
||||
|
||||
public ReluctantQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int shift = 0;
|
||||
|
||||
do {
|
||||
shift = next.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
|
||||
if (stringIndex + leaf.charCount() <= matchResult.getRightBound()) {
|
||||
shift = leaf.accepts(stringIndex, testString);
|
||||
stringIndex += shift;
|
||||
}
|
||||
} while (shift >= 1);
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
final class SOLSet extends AbstractSet {
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
if (strIndex == 0
|
||||
|| (matchResult.hasAnchoringBounds() && strIndex == matchResult
|
||||
.getLeftBound())) {
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<SOL>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This class represents nodes constructed with character sequences. For
|
||||
* example, lets consider regular expression: ".*word.*". During regular
|
||||
* expression compilation phase character sequence w-o-r-d, will be represented
|
||||
* with single node for the entire word.
|
||||
*
|
||||
* During the match phase, Moyer-Moore algorithm will be used for fast
|
||||
* searching.
|
||||
*
|
||||
* Please follow the next link for more details about mentioned algorithm:
|
||||
* http://portal.acm.org/citation.cfm?id=359859
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class SequenceSet extends LeafSet {
|
||||
|
||||
private String string = null;
|
||||
|
||||
private IntHash leftToRight;
|
||||
|
||||
private IntHash rightToLeft;
|
||||
|
||||
SequenceSet(StringBuffer substring) {
|
||||
this.string = substring.toString();
|
||||
charCount = substring.length();
|
||||
|
||||
leftToRight = new IntHash(charCount);
|
||||
rightToLeft = new IntHash(charCount);
|
||||
for (int j = 0; j < charCount - 1; j++) {
|
||||
leftToRight.put(string.charAt(j), charCount - j - 1);
|
||||
rightToLeft
|
||||
.put(string.charAt(charCount - j - 1), charCount - j - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return startsWith(testString, strIndex) ? charCount : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (strIndex <= strLength) {
|
||||
strIndex = indexOf(testString, strIndex, strLength);
|
||||
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
if (next.matches(strIndex + charCount, testString, matchResult) >= 0)
|
||||
return strIndex;
|
||||
|
||||
strIndex++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString,
|
||||
MatchResultImpl matchResult) {
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = lastIndexOf(testString, strIndex, lastIndex);
|
||||
|
||||
if (lastIndex < 0)
|
||||
return -1;
|
||||
if (next.matches(lastIndex + charCount, testString, matchResult) >= 0)
|
||||
return lastIndex;
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "sequence: " + string; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof CharSet) {
|
||||
return ((CharSet) set).getChar() == string.charAt(0);
|
||||
} else if (set instanceof RangeSet) {
|
||||
return ((RangeSet) set).accepts(0, string.substring(0, 1)) > 0;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return ((SupplRangeSet) set).contains(string.charAt(0))
|
||||
|| ((string.length() > 1) && ((SupplRangeSet) set).contains(Character
|
||||
.toCodePoint(string.charAt(0), string.charAt(1))));
|
||||
} else if ((set instanceof SupplCharSet)) {
|
||||
return (string.length() > 1)
|
||||
? ((SupplCharSet) set).getCodePoint()
|
||||
== Character.toCodePoint(string.charAt(0),
|
||||
string.charAt(1))
|
||||
: false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected int indexOf(CharSequence str, int from, int to) {
|
||||
int last = string.charAt(charCount - 1);
|
||||
int i = from;
|
||||
|
||||
while (i <= to - charCount) {
|
||||
char ch = str.charAt(i + charCount - 1);
|
||||
if (ch == last && startsWith(str, i)) {
|
||||
return i;
|
||||
}
|
||||
|
||||
i += leftToRight.get(ch);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
protected int lastIndexOf(CharSequence str, int to, int from) {
|
||||
int first = string.charAt(0);
|
||||
int size = str.length();
|
||||
int delta;
|
||||
int i = ((delta = size - from - charCount) > 0) ? from : from + delta;
|
||||
|
||||
while (i >= to) {
|
||||
char ch = str.charAt(i);
|
||||
if (ch == first && startsWith(str, i)) {
|
||||
return i;
|
||||
}
|
||||
|
||||
i -= rightToLeft.get(ch);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
protected boolean startsWith(CharSequence str, int from) {
|
||||
for (int i = 0; i < charCount; i++) {
|
||||
if (str.charAt(i + from) != string.charAt(i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static class IntHash {
|
||||
int[] table, values;
|
||||
|
||||
int mask;
|
||||
|
||||
int size; // <-maximum shift
|
||||
|
||||
public IntHash(int size) {
|
||||
while (size >= mask) {
|
||||
mask = (mask << 1) | 1;
|
||||
}
|
||||
mask = (mask << 1) | 1;
|
||||
table = new int[mask + 1];
|
||||
values = new int[mask + 1];
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public void put(int key, int value) {
|
||||
int i = 0;
|
||||
int hashCode = key & mask;
|
||||
|
||||
for (;;) {
|
||||
if (table[hashCode] == 0 // empty
|
||||
|| table[hashCode] == key) {// rewrite
|
||||
table[hashCode] = key;
|
||||
values[hashCode] = value;
|
||||
return;
|
||||
}
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
|
||||
public int get(int key) {
|
||||
|
||||
int hashCode = key & mask;
|
||||
int i = 0;
|
||||
int storedKey;
|
||||
|
||||
for (;;) {
|
||||
storedKey = table[hashCode];
|
||||
|
||||
if (storedKey == 0) { // empty
|
||||
return size;
|
||||
}
|
||||
|
||||
if (storedKey == key) {
|
||||
return values[hashCode];
|
||||
}
|
||||
|
||||
i++;
|
||||
i &= mask;
|
||||
|
||||
hashCode += i;
|
||||
hashCode &= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Group node over subexpression w/o alternations.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class SingleSet extends JointSet {
|
||||
|
||||
protected AbstractSet kid;
|
||||
|
||||
public SingleSet(AbstractSet child, FSet fSet) {
|
||||
this.kid = child;
|
||||
this.fSet = fSet;
|
||||
this.groupIndex = fSet.getGroupIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int start = matchResult.getStart(groupIndex);
|
||||
matchResult.setStart(groupIndex, stringIndex);
|
||||
int shift = kid.matches(stringIndex, testString, matchResult);
|
||||
if (shift >= 0) {
|
||||
return shift;
|
||||
}
|
||||
matchResult.setStart(groupIndex, start);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int res = kid.find(stringIndex, testString, matchResult);
|
||||
if (res >= 0)
|
||||
matchResult.setStart(groupIndex, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int stringIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int res = kid.findBack(stringIndex, lastIndex, testString, matchResult);
|
||||
if (res >= 0)
|
||||
matchResult.setStart(groupIndex, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
return kid.first(set);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for replacement backreferenced sets.
|
||||
*/
|
||||
@Override
|
||||
public JointSet processBackRefReplacement() {
|
||||
BackReferencedSingleSet set = new BackReferencedSingleSet(this);
|
||||
|
||||
/*
|
||||
* We will store a reference to created BackReferencedSingleSet in next
|
||||
* field. This is needed toprocess replacement of sets correctly since
|
||||
* sometimes we cannot renew all references to detachable set in the
|
||||
* current point of traverse. See QuantifierSet and AbstractSet
|
||||
* processSecondPass() methods for more details.
|
||||
*/
|
||||
next = set;
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for traversing nodes after the first stage of
|
||||
* compilation.
|
||||
*/
|
||||
@Override
|
||||
public void processSecondPass() {
|
||||
this.isSecondPassVisited = true;
|
||||
|
||||
if (fSet != null && !fSet.isSecondPassVisited) {
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
fSet.processSecondPass();
|
||||
}
|
||||
|
||||
if (kid != null && !kid.isSecondPassVisited) {
|
||||
|
||||
/*
|
||||
* Add here code to do during the pass
|
||||
*/
|
||||
JointSet set = kid.processBackRefReplacement();
|
||||
|
||||
if (set != null) {
|
||||
kid.isSecondPassVisited = true;
|
||||
kid = set;
|
||||
}
|
||||
|
||||
/*
|
||||
* End code to do during the pass
|
||||
*/
|
||||
|
||||
kid.processSecondPass();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* This is base class for special tokens like character classes
|
||||
* and quantifiers.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
abstract class SpecialToken {
|
||||
|
||||
public static final int TOK_CHARCLASS = 1 << 0;
|
||||
|
||||
public static final int TOK_QUANTIFIER = 1 << 1;
|
||||
|
||||
/**
|
||||
* Returns the type of the token, may return following values:
|
||||
* TOK_CHARCLASS - token representing character class;
|
||||
* TOK_QUANTIFIER - token representing quantifier;
|
||||
*
|
||||
* @return character type.
|
||||
*/
|
||||
public abstract int getType();
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single supplementary codepoint.
|
||||
*/
|
||||
class SupplCharSet extends LeafSet {
|
||||
|
||||
/*
|
||||
* UTF-16 encoding of this supplementary codepoint
|
||||
*/
|
||||
private char high = 0;
|
||||
|
||||
private char low = 0;
|
||||
|
||||
// int value of this supplementary codepoint
|
||||
private int ch;
|
||||
|
||||
public SupplCharSet(int ch) {
|
||||
charCount = 2;
|
||||
this.ch = ch;
|
||||
char[] chUTF16 = Character.toChars(ch);
|
||||
high = chUTF16[0];
|
||||
|
||||
/*
|
||||
* we suppose that SupplCharSet is build over supplementary codepoints
|
||||
* only
|
||||
*/
|
||||
low = chUTF16[1];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
char high = testString.charAt(strIndex++);
|
||||
char low = testString.charAt(strIndex);
|
||||
return ((this.high == high) && (this.low == low)) ? 2 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
int strLength = matchResult.getRightBound();
|
||||
|
||||
while (strIndex < strLength) {
|
||||
strIndex = testStr.indexOf(high, strIndex);
|
||||
if (strIndex < 0)
|
||||
return -1;
|
||||
|
||||
strIndex++;
|
||||
if (strIndex < strLength) {
|
||||
char ch = testStr.charAt(strIndex);
|
||||
|
||||
if ((low == ch) && (next.matches(strIndex + 1, testString, matchResult) >= 0)) {
|
||||
return --strIndex;
|
||||
}
|
||||
strIndex++;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.find(strIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int findBack(int strIndex, int lastIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
|
||||
if (testString instanceof String) {
|
||||
String testStr = (String)testString;
|
||||
|
||||
while (lastIndex >= strIndex) {
|
||||
lastIndex = testStr.lastIndexOf(low, lastIndex);
|
||||
lastIndex--;
|
||||
if (lastIndex < 0 || lastIndex < strIndex) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((high == testStr.charAt(lastIndex)) && next.matches(lastIndex + 2, testString, matchResult) >= 0) {
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
lastIndex--;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
return super.findBack(strIndex, lastIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "" + high + low;
|
||||
}
|
||||
|
||||
protected int getCodePoint() {
|
||||
return ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof SupplCharSet) {
|
||||
return ((SupplCharSet)set).getCodePoint() == ch;
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return ((SupplRangeSet)set).contains(ch);
|
||||
} else if (set instanceof CharSet) {
|
||||
return false;
|
||||
} else if (set instanceof RangeSet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character from the given char class. This
|
||||
* character can be supplementary (2 chars needed to represent) or from basic
|
||||
* multilingual pane (1 needed char to represent it).
|
||||
*/
|
||||
class SupplRangeSet extends JointSet {
|
||||
|
||||
protected AbstractCharClass chars;
|
||||
|
||||
protected boolean alt = false;
|
||||
|
||||
public SupplRangeSet(AbstractCharClass cs, AbstractSet next) {
|
||||
this.chars = cs.getInstance();
|
||||
this.alt = cs.alt;
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
public SupplRangeSet(AbstractCharClass cc) {
|
||||
this.chars = cc.getInstance();
|
||||
this.alt = cc.alt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strLength = matchResult.getRightBound();
|
||||
int offset = -1;
|
||||
|
||||
if (stringIndex < strLength) {
|
||||
char high = testString.charAt(stringIndex++);
|
||||
|
||||
if (contains(high) && (offset = next.matches(stringIndex, testString, matchResult)) > 0) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
if (stringIndex < strLength) {
|
||||
char low = testString.charAt(stringIndex++);
|
||||
|
||||
if (Character.isSurrogatePair(high, low) && contains(Character.toCodePoint(high, low))) {
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "range:" + (alt ? "^ " : " ") + chars.toString();
|
||||
}
|
||||
|
||||
public boolean contains(int ch) {
|
||||
return chars.contains(ch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean first(AbstractSet set) {
|
||||
if (set instanceof SupplCharSet) {
|
||||
return AbstractCharClass.intersects(chars, ((SupplCharSet)set).getCodePoint());
|
||||
} else if (set instanceof CharSet) {
|
||||
return AbstractCharClass.intersects(chars, ((CharSet)set).getChar());
|
||||
} else if (set instanceof SupplRangeSet) {
|
||||
return AbstractCharClass.intersects(chars, ((SupplRangeSet)set).chars);
|
||||
} else if (set instanceof RangeSet) {
|
||||
return AbstractCharClass.intersects(chars, ((RangeSet)set).getChars());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected AbstractCharClass getChars() {
|
||||
return chars;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractSet getNext() {
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNext(AbstractSet next) {
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl mr) {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Holds the results of a successful match of a {@link TPattern} against a
|
||||
* given string. The result is divided into groups, with one group for each
|
||||
* pair of parentheses in the regular expression and an additional group for
|
||||
* the whole regular expression. The start, end, and contents of each group
|
||||
* can be queried.
|
||||
*
|
||||
* @see TMatcher
|
||||
* @see TMatcher#toMatchResult()
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
public interface TMatchResult {
|
||||
|
||||
/**
|
||||
* Returns the index of the first character following the text that matched
|
||||
* the whole regular expression.
|
||||
*
|
||||
* @return the character index.
|
||||
*/
|
||||
int end();
|
||||
|
||||
/**
|
||||
* Returns the index of the first character following the text that matched
|
||||
* a given group.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
*
|
||||
* @return the character index.
|
||||
*/
|
||||
int end(int group);
|
||||
|
||||
/**
|
||||
* Returns the text that matched the whole regular expression.
|
||||
*
|
||||
* @return the text.
|
||||
*/
|
||||
String group();
|
||||
|
||||
/**
|
||||
* Returns the text that matched a given group of the regular expression.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
*
|
||||
* @return the text that matched the group.
|
||||
*/
|
||||
String group(int group);
|
||||
|
||||
/**
|
||||
* Returns the number of groups in the result, which is always equal to
|
||||
* the number of groups in the original regular expression.
|
||||
*
|
||||
* @return the number of groups.
|
||||
*/
|
||||
int groupCount();
|
||||
|
||||
/**
|
||||
* Returns the index of the first character of the text that matched
|
||||
* the whole regular expression.
|
||||
*
|
||||
* @return the character index.
|
||||
*/
|
||||
int start();
|
||||
|
||||
/**
|
||||
* Returns the index of the first character of the text that matched a given
|
||||
* group.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
*
|
||||
* @return the character index.
|
||||
*/
|
||||
int start(int group);
|
||||
}
|
|
@ -0,0 +1,699 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Provides a means of matching regular expressions against a given input,
|
||||
* finding occurrences of regular expressions in a given input, or replacing
|
||||
* parts of a given input. A {@code Matcher} instance has an associated
|
||||
* {@link TPattern} instance and an input text. A typical use case is to
|
||||
* iteratively find all occurrences of the {@code Pattern}, until the end of the
|
||||
* input is reached, as the following example illustrates:
|
||||
*
|
||||
* <p/>
|
||||
*
|
||||
* <pre>
|
||||
* Pattern p = Pattern.compile("[A-Za-z]+");
|
||||
*
|
||||
* Matcher m = p.matcher("Hello, Android!");
|
||||
* while (m.find()) {
|
||||
* System.out.println(m.group()); // prints "Hello" and "Android"
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p/>
|
||||
*
|
||||
* The {@code Matcher} has a state that results from the previous operations.
|
||||
* For example, it knows whether the most recent attempt to find the
|
||||
* {@code Pattern} was successful and at which position the next attempt would
|
||||
* resume the search. Depending on the application's needs, it may become
|
||||
* necessary to explicitly {@link #reset()} this state from time to time.
|
||||
*/
|
||||
public final class TMatcher implements TMatchResult {
|
||||
|
||||
static int MODE_FIND = 1 << 0;
|
||||
|
||||
static int MODE_MATCH = 1 << 1;
|
||||
|
||||
private TPattern pat = null;
|
||||
|
||||
private AbstractSet start = null;
|
||||
|
||||
private CharSequence string = null;
|
||||
|
||||
private MatchResultImpl matchResult = null;
|
||||
|
||||
// bounds
|
||||
private int leftBound = -1;
|
||||
|
||||
private int rightBound = -1;
|
||||
|
||||
// replacements
|
||||
private int appendPos = 0;
|
||||
|
||||
private String replacement = null;
|
||||
|
||||
private String processedRepl = null;
|
||||
|
||||
private ArrayList<Object> replacementParts = null;
|
||||
|
||||
/**
|
||||
* Appends a literal part of the input plus a replacement for the current
|
||||
* match to a given {@link StringBuffer}. The literal part is exactly the
|
||||
* part of the input between the previous match and the current match. The
|
||||
* method can be used in conjunction with {@link #find()} and
|
||||
* {@link #appendTail(StringBuffer)} to walk through the input and replace
|
||||
* all occurrences of the {@code Pattern} with something else.
|
||||
*
|
||||
* @param buffer
|
||||
* the {@code StringBuffer} to append to.
|
||||
* @param replacement
|
||||
* the replacement text.
|
||||
* @return the {@code Matcher} itself.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
public TMatcher appendReplacement(StringBuffer buffer, String replacement) {
|
||||
processedRepl = processReplacement(replacement);
|
||||
buffer.append(string.subSequence(appendPos, start()));
|
||||
buffer.append(processedRepl);
|
||||
appendPos = end();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses replacement string and creates pattern
|
||||
*/
|
||||
private String processReplacement(String replacement) {
|
||||
if (this.replacement != null && this.replacement.equals(replacement)) {
|
||||
if (replacementParts == null) {
|
||||
return processedRepl;
|
||||
} else {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < replacementParts.size(); i++) {
|
||||
sb.append(replacementParts.get(i));
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
} else {
|
||||
this.replacement = replacement;
|
||||
char[] repl = replacement.toCharArray();
|
||||
StringBuilder res = new StringBuilder();
|
||||
replacementParts = null;
|
||||
|
||||
int index = 0;
|
||||
int replacementPos = 0;
|
||||
boolean nextBackSlashed = false;
|
||||
|
||||
while (index < repl.length) {
|
||||
|
||||
if (repl[index] == '\\' && !nextBackSlashed) {
|
||||
nextBackSlashed = true;
|
||||
index++;
|
||||
}
|
||||
|
||||
if (nextBackSlashed) {
|
||||
res.append(repl[index]);
|
||||
nextBackSlashed = false;
|
||||
} else {
|
||||
if (repl[index] == '$') {
|
||||
if (replacementParts == null) {
|
||||
replacementParts = new ArrayList<>();
|
||||
}
|
||||
try {
|
||||
final int gr = Integer.parseInt(new String(repl, ++index, 1));
|
||||
|
||||
if (replacementPos != res.length()) {
|
||||
replacementParts.add(res.subSequence(replacementPos, res.length()));
|
||||
replacementPos = res.length();
|
||||
}
|
||||
|
||||
replacementParts.add(new Object() {
|
||||
private final int grN = gr;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return group(grN);
|
||||
}
|
||||
});
|
||||
String group = group(gr);
|
||||
replacementPos += group.length();
|
||||
res.append(group);
|
||||
|
||||
} catch (IndexOutOfBoundsException iob) {
|
||||
throw iob;
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException("");
|
||||
}
|
||||
} else {
|
||||
res.append(repl[index]);
|
||||
}
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
if (replacementParts != null && replacementPos != res.length()) {
|
||||
replacementParts.add(res.subSequence(replacementPos, res.length()));
|
||||
}
|
||||
return res.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a new input and resets the {@code Matcher}. This results in the
|
||||
* region being set to the whole input. Results of a previous find get lost.
|
||||
* The next attempt to find an occurrence of the {@link TPattern} in the
|
||||
* string will start at the beginning of the input.
|
||||
*
|
||||
* @param input
|
||||
* the new input sequence.
|
||||
*
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher reset(CharSequence input) {
|
||||
if (input == null) {
|
||||
throw new NullPointerException("");
|
||||
}
|
||||
this.string = input;
|
||||
return reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the {@code Matcher}. This results in the region being set to the
|
||||
* whole input. Results of a previous find get lost. The next attempt to
|
||||
* find an occurrence of the {@link TPattern} in the string will start at
|
||||
* the beginning of the input.
|
||||
*
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher reset() {
|
||||
this.leftBound = 0;
|
||||
this.rightBound = string.length();
|
||||
matchResult.reset(string, leftBound, rightBound);
|
||||
appendPos = 0;
|
||||
replacement = null;
|
||||
matchResult.previousMatch = -1;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this matcher and sets a region. Only characters inside the region
|
||||
* are considered for a match.
|
||||
*
|
||||
* @param start
|
||||
* the first character of the region.
|
||||
* @param end
|
||||
* the first character after the end of the region.
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher region(int start, int end) {
|
||||
|
||||
if (start > end || start < 0 || end < 0 || start > string.length() || end > string.length()) {
|
||||
throw new IndexOutOfBoundsException(start + ", " + end);
|
||||
}
|
||||
|
||||
this.leftBound = start;
|
||||
this.rightBound = end;
|
||||
matchResult.reset(null, start, end);
|
||||
appendPos = 0;
|
||||
replacement = null;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the (unmatched) remainder of the input to the given
|
||||
* {@link StringBuffer}. The method can be used in conjunction with
|
||||
* {@link #find()} and {@link #appendReplacement(StringBuffer, String)} to
|
||||
* walk through the input and replace all matches of the {@code Pattern}
|
||||
* with something else.
|
||||
*
|
||||
* @param buffer
|
||||
* the {@code StringBuffer} to append to.
|
||||
* @return the {@code StringBuffer}.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
public StringBuffer appendTail(StringBuffer buffer) {
|
||||
return buffer.append(string.subSequence(appendPos, string.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces the first occurrence of this matcher's pattern in the input with
|
||||
* a given string.
|
||||
*
|
||||
* @param replacement
|
||||
* the replacement text.
|
||||
* @return the modified input string.
|
||||
*/
|
||||
public String replaceFirst(String replacement) {
|
||||
reset();
|
||||
if (find()) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
appendReplacement(sb, replacement);
|
||||
return appendTail(sb).toString();
|
||||
}
|
||||
|
||||
return string.toString();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces all occurrences of this matcher's pattern in the input with a
|
||||
* given string.
|
||||
*
|
||||
* @param replacement
|
||||
* the replacement text.
|
||||
* @return the modified input string.
|
||||
*/
|
||||
public String replaceAll(String replacement) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
reset();
|
||||
while (find()) {
|
||||
appendReplacement(sb, replacement);
|
||||
}
|
||||
|
||||
return appendTail(sb).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link TPattern} instance used inside this matcher.
|
||||
*
|
||||
* @return the {@code Pattern} instance.
|
||||
*/
|
||||
public TPattern pattern() {
|
||||
return pat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text that matched a given group of the regular expression.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
* @return the text that matched the group.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public String group(int group) {
|
||||
if (group < 0 || group > matchResult.groupCount()) {
|
||||
throw new IndexOutOfBoundsException("Index " + group + " if out of range [0; " +
|
||||
matchResult.groupCount() + ")");
|
||||
}
|
||||
return matchResult.group(group);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text that matched the whole regular expression.
|
||||
*
|
||||
* @return the text.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public String group() {
|
||||
return group(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next occurrence of the {@link TPattern} in the input. The
|
||||
* method starts the search from the given character in the input.
|
||||
*
|
||||
* @param start
|
||||
* The index in the input at which the find operation is to
|
||||
* begin. If this is less than the start of the region, it is
|
||||
* automatically adjusted to that value. If it is beyond the end
|
||||
* of the region, the method will fail.
|
||||
* @return true if (and only if) a match has been found.
|
||||
*/
|
||||
public boolean find(int start) {
|
||||
int stringLength = string.length();
|
||||
if (start < 0 || start > stringLength) {
|
||||
throw new IndexOutOfBoundsException(String.valueOf(start));
|
||||
}
|
||||
|
||||
start = findAt(start);
|
||||
if (start >= 0 && matchResult.isValid()) {
|
||||
matchResult.finalizeMatch();
|
||||
return true;
|
||||
}
|
||||
matchResult.startIndex = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
private int findAt(int startIndex) {
|
||||
matchResult.reset();
|
||||
matchResult.setMode(TMatcher.MODE_FIND);
|
||||
matchResult.setStartIndex(startIndex);
|
||||
int foundIndex = start.find(startIndex, string, matchResult);
|
||||
if (foundIndex == -1) {
|
||||
matchResult.hitEnd = true;
|
||||
}
|
||||
return foundIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next occurrence of the {@link TPattern} in the input. If a
|
||||
* previous match was successful, the method continues the search from the
|
||||
* first character following that match in the input. Otherwise it searches
|
||||
* either from the region start (if one has been set), or from position 0.
|
||||
*
|
||||
* @return true if (and only if) a match has been found.
|
||||
*/
|
||||
public boolean find() {
|
||||
int length = string.length();
|
||||
if (!hasTransparentBounds())
|
||||
length = rightBound;
|
||||
if (matchResult.startIndex >= 0 && matchResult.mode() == TMatcher.MODE_FIND) {
|
||||
matchResult.startIndex = matchResult.end();
|
||||
if (matchResult.end() == matchResult.start()) {
|
||||
matchResult.startIndex++;
|
||||
}
|
||||
|
||||
return matchResult.startIndex <= length ? find(matchResult.startIndex) : false;
|
||||
} else {
|
||||
return find(leftBound);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first character of the text that matched a given
|
||||
* group.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
* @return the character index.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public int start(int group) {
|
||||
return matchResult.start(group);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first character following the text that matched
|
||||
* a given group.
|
||||
*
|
||||
* @param group
|
||||
* the group, ranging from 0 to groupCount() - 1, with 0
|
||||
* representing the whole pattern.
|
||||
* @return the character index.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public int end(int group) {
|
||||
return matchResult.end(group);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to match the {@link TPattern} against the entire region (or the
|
||||
* entire input, if no region has been set).
|
||||
*
|
||||
* @return true if (and only if) the {@code Pattern} matches the entire
|
||||
* region.
|
||||
*/
|
||||
public boolean matches() {
|
||||
return lookingAt(leftBound, TMatcher.MODE_MATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a replacement string for the given one that has all backslashes
|
||||
* and dollar signs escaped.
|
||||
*
|
||||
* @param s
|
||||
* the input string.
|
||||
* @return the input string, with all backslashes and dollar signs having
|
||||
* been escaped.
|
||||
*/
|
||||
public static String quoteReplacement(String s) {
|
||||
// first check whether we have smth to quote
|
||||
if (s.indexOf('\\') < 0 && s.indexOf('$') < 0)
|
||||
return s;
|
||||
StringBuilder res = new StringBuilder(s.length() * 2);
|
||||
char ch;
|
||||
int len = s.length();
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
switch (ch = s.charAt(i)) {
|
||||
case '$':
|
||||
res.append('\\');
|
||||
res.append('$');
|
||||
break;
|
||||
case '\\':
|
||||
res.append('\\');
|
||||
res.append('\\');
|
||||
break;
|
||||
default:
|
||||
res.append(ch);
|
||||
}
|
||||
}
|
||||
|
||||
return res.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs match starting from <code>set</code> specified against input
|
||||
* sequence starting at <code>index</code> specified; Result of the match
|
||||
* will be stored into matchResult instance;
|
||||
*/
|
||||
private boolean runMatch(AbstractSet set, int index, MatchResultImpl matchResult) {
|
||||
|
||||
if (set.matches(index, string, matchResult) >= 0) {
|
||||
matchResult.finalizeMatch();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to match the {@link TPattern}, starting from the beginning of the
|
||||
* region (or the beginning of the input, if no region has been set).
|
||||
* Doesn't require the {@code Pattern} to match against the whole region.
|
||||
*
|
||||
* @return true if (and only if) the {@code Pattern} matches.
|
||||
*/
|
||||
public boolean lookingAt() {
|
||||
return lookingAt(leftBound, TMatcher.MODE_FIND);
|
||||
}
|
||||
|
||||
private boolean lookingAt(int startIndex, int mode) {
|
||||
matchResult.reset();
|
||||
matchResult.setMode(mode);
|
||||
matchResult.setStartIndex(startIndex);
|
||||
return runMatch(start, startIndex, matchResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first character of the text that matched the
|
||||
* whole regular expression.
|
||||
*
|
||||
* @return the character index.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public int start() {
|
||||
return start(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of groups in the results, which is always equal to the
|
||||
* number of groups in the original regular expression.
|
||||
*
|
||||
* @return the number of groups.
|
||||
*/
|
||||
@Override
|
||||
public int groupCount() {
|
||||
return matchResult.groupCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the first character following the text that matched
|
||||
* the whole regular expression.
|
||||
*
|
||||
* @return the character index.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
@Override
|
||||
public int end() {
|
||||
return end(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the current match into a separate {@link TMatchResult} instance
|
||||
* that is independent from this matcher. The new object is unaffected when
|
||||
* the state of this matcher changes.
|
||||
*
|
||||
* @return the new {@code MatchResult}.
|
||||
* @throws IllegalStateException
|
||||
* if no successful match has been made.
|
||||
*/
|
||||
public TMatchResult toMatchResult() {
|
||||
return this.matchResult.cloneImpl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether this matcher has anchoring bounds enabled or not. When
|
||||
* anchoring bounds are enabled, the start and end of the input match the
|
||||
* '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
|
||||
* by default.
|
||||
*
|
||||
* @param value
|
||||
* the new value for anchoring bounds.
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher useAnchoringBounds(boolean value) {
|
||||
matchResult.useAnchoringBounds(value);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether this matcher has anchoring bounds enabled. When
|
||||
* anchoring bounds are enabled, the start and end of the input match the
|
||||
* '^' and '$' meta-characters, otherwise not. Anchoring bounds are enabled
|
||||
* by default.
|
||||
*
|
||||
* @return true if (and only if) the {@code Matcher} uses anchoring bounds.
|
||||
*/
|
||||
public boolean hasAnchoringBounds() {
|
||||
return matchResult.hasAnchoringBounds();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether this matcher has transparent bounds enabled or not.
|
||||
* When transparent bounds are enabled, the parts of the input outside the
|
||||
* region are subject to lookahead and lookbehind, otherwise they are not.
|
||||
* Transparent bounds are disabled by default.
|
||||
*
|
||||
* @param value
|
||||
* the new value for transparent bounds.
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher useTransparentBounds(boolean value) {
|
||||
matchResult.useTransparentBounds(value);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether this matcher has transparent bounds enabled. When
|
||||
* transparent bounds are enabled, the parts of the input outside the region
|
||||
* are subject to lookahead and lookbehind, otherwise they are not.
|
||||
* Transparent bounds are disabled by default.
|
||||
*
|
||||
* @return true if (and only if) the {@code Matcher} uses anchoring bounds.
|
||||
*/
|
||||
public boolean hasTransparentBounds() {
|
||||
return matchResult.hasTransparentBounds();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this matcher's region start, that is, the first character that is
|
||||
* considered for a match.
|
||||
*
|
||||
* @return the start of the region.
|
||||
*/
|
||||
public int regionStart() {
|
||||
return matchResult.getLeftBound();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this matcher's region end, that is, the first character that is
|
||||
* not considered for a match.
|
||||
*
|
||||
* @return the end of the region.
|
||||
*/
|
||||
public int regionEnd() {
|
||||
return matchResult.getRightBound();
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether more input might change a successful match into an
|
||||
* unsuccessful one.
|
||||
*
|
||||
* @return true if (and only if) more input might change a successful match
|
||||
* into an unsuccessful one.
|
||||
*/
|
||||
public boolean requireEnd() {
|
||||
return matchResult.requireEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether the last match hit the end of the input.
|
||||
*
|
||||
* @return true if (and only if) the last match hit the end of the input.
|
||||
*/
|
||||
public boolean hitEnd() {
|
||||
return matchResult.hitEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a new pattern for the {@code Matcher}. Results of a previous find
|
||||
* get lost. The next attempt to find an occurrence of the {@link TPattern}
|
||||
* in the string will start at the beginning of the input.
|
||||
*
|
||||
* @param pattern
|
||||
* the new {@code Pattern}.
|
||||
*
|
||||
* @return the {@code Matcher} itself.
|
||||
*/
|
||||
public TMatcher usePattern(TPattern pattern) {
|
||||
if (pattern == null) {
|
||||
throw new IllegalArgumentException("");
|
||||
}
|
||||
int startIndex = matchResult.getPreviousMatchEnd();
|
||||
int mode = matchResult.mode();
|
||||
this.pat = pattern;
|
||||
this.start = pattern.start;
|
||||
matchResult = new MatchResultImpl(this.string, leftBound, rightBound, pattern.groupCount(),
|
||||
pattern.compCount(), pattern.consCount());
|
||||
matchResult.setStartIndex(startIndex);
|
||||
matchResult.setMode(mode);
|
||||
return this;
|
||||
}
|
||||
|
||||
TMatcher(TPattern pat, CharSequence cs) {
|
||||
this.pat = pat;
|
||||
this.start = pat.start;
|
||||
this.string = cs;
|
||||
this.leftBound = 0;
|
||||
this.rightBound = string.length();
|
||||
matchResult = new MatchResultImpl(cs, leftBound, rightBound, pat.groupCount(), pat.compCount(), pat.consCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String lastMatch = "";
|
||||
try {
|
||||
lastMatch = Integer.toString(start());
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
return "Regex[pattern=" + pat + " region=" + matchResult.getLeftBound() + "," + matchResult.getRightBound() +
|
||||
" lastmatch=" + lastMatch + "]";
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Encapsulates a syntax error that occurred during the compilation of a
|
||||
* {@link TPattern}. Might include a detailed description, the original regular
|
||||
* expression, and the index at which the error occurred.
|
||||
*
|
||||
* @see TPattern#compile(String)
|
||||
* @see TPattern#compile(java.lang.String,int)
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
public class TPatternSyntaxException extends IllegalArgumentException {
|
||||
|
||||
private static final long serialVersionUID = -3864639126226059218L;
|
||||
|
||||
/**
|
||||
* Holds the description of the syntax error, or null if the description is
|
||||
* not known.
|
||||
*/
|
||||
private String desc;
|
||||
|
||||
/**
|
||||
* Holds the syntactically incorrect regular expression, or null if the
|
||||
* regular expression is not known.
|
||||
*/
|
||||
private String pattern;
|
||||
|
||||
/**
|
||||
* Holds the index around which the error occured, or -1, in case it is
|
||||
* unknown.
|
||||
*/
|
||||
private int index = -1;
|
||||
|
||||
/**
|
||||
* Creates a new PatternSyntaxException for a given message, pattern, and
|
||||
* error index.
|
||||
*
|
||||
* @param description
|
||||
* the description of the syntax error, or {@code null} if the
|
||||
* description is not known.
|
||||
* @param pattern
|
||||
* the syntactically incorrect regular expression, or
|
||||
* {@code null} if the regular expression is not known.
|
||||
* @param index
|
||||
* the character index around which the error occurred, or -1 if
|
||||
* the index is not known.
|
||||
*/
|
||||
public TPatternSyntaxException(String description, String pattern, int index) {
|
||||
this.desc = description;
|
||||
this.pattern = pattern;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the syntactically incorrect regular expression.
|
||||
*
|
||||
* @return the regular expression.
|
||||
*
|
||||
*/
|
||||
public String getPattern() {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a detailed error message for the exception. The message is
|
||||
* potentially multi-line, and it might include a detailed description, the
|
||||
* original regular expression, and the index at which the error occured.
|
||||
*
|
||||
* @return the error message.
|
||||
*/
|
||||
@Override
|
||||
public String getMessage() {
|
||||
String filler = "";
|
||||
if (index >= 1) {
|
||||
char[] temp = new char[index];
|
||||
Arrays.fill(temp, ' ');
|
||||
filler = new String(temp);
|
||||
}
|
||||
return desc + ((pattern != null && pattern.length() != 0) ? index + ", " + pattern + ", " + filler : "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the description of the syntax error, or {@code null} if the
|
||||
* description is not known.
|
||||
*
|
||||
* @return the description.
|
||||
*/
|
||||
public String getDescription() {
|
||||
return desc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the character index around which the error occurred, or -1 if the
|
||||
* index is not known.
|
||||
*
|
||||
* @return the index.
|
||||
*
|
||||
*/
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Unicode case insensitive back reference (i.e. \1-9) node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UCIBackReferenceSet extends CIBackReferenceSet {
|
||||
|
||||
int groupIndex;
|
||||
|
||||
public UCIBackReferenceSet(int groupIndex, int consCounter) {
|
||||
super(groupIndex, consCounter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
String group = getString(matchResult);
|
||||
|
||||
if (group == null || (stringIndex + group.length()) > matchResult.getRightBound())
|
||||
return -1;
|
||||
|
||||
for (int i = 0; i < group.length(); i++) {
|
||||
if (Character.toLowerCase(Character.toUpperCase(group.charAt(i))) != Character.toLowerCase(Character
|
||||
.toUpperCase(testString.charAt(stringIndex + i)))) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
matchResult.setConsumed(consCounter, group.length());
|
||||
return next.matches(stringIndex + group.length(), testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "UCI back reference: " + this.groupIndex; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character in unicode case
|
||||
* insensitive manner.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UCICharSet extends LeafSet {
|
||||
|
||||
private char ch;
|
||||
|
||||
public UCICharSet(char ch) {
|
||||
this.ch = Character.toLowerCase(Character.toUpperCase(ch));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return (this.ch == Character.toLowerCase(Character
|
||||
.toUpperCase(testString.charAt(strIndex)))) ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "UCI " + ch;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents Unicode case insensitive
|
||||
* canonical decomposition of
|
||||
* Unicode character. Is used when
|
||||
* CANON_EQ flag of Pattern class
|
||||
* is specified.
|
||||
*/
|
||||
class UCIDecomposedCharSet extends DecomposedCharSet{
|
||||
|
||||
/*
|
||||
* Just only a stub
|
||||
*/
|
||||
public UCIDecomposedCharSet(int [] decomp, int decomposedCharLength) {
|
||||
super(decomp, decomposedCharLength);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character from the given char class. Note,
|
||||
* this class contains normalized characters fo unicode case, asci case is
|
||||
* supported through adding both symbols to the range.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UCIRangeSet extends LeafSet {
|
||||
|
||||
private AbstractCharClass chars;
|
||||
|
||||
private boolean alt = false;
|
||||
|
||||
public UCIRangeSet(AbstractCharClass cs, AbstractSet next) {
|
||||
super(next);
|
||||
this.chars = cs.getInstance();
|
||||
this.alt = cs.alt;
|
||||
}
|
||||
|
||||
public UCIRangeSet(AbstractCharClass cc) {
|
||||
this.chars = cc.getInstance();
|
||||
this.alt = cc.alt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
return (chars.contains(Character.toLowerCase(Character
|
||||
.toUpperCase(testString.charAt(strIndex))))) ? 1 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "UCI range:" + (alt ? "^ " : " ") + chars.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Node accepting substrings in unicode case insensitive manner.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UCISequenceSet extends LeafSet {
|
||||
|
||||
private String string = null;
|
||||
|
||||
UCISequenceSet(StringBuffer substring) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (int i = 0; i < substring.length(); i++) {
|
||||
res.append(Character.toLowerCase(Character.toUpperCase(substring.charAt(i))));
|
||||
}
|
||||
this.string = res.toString();
|
||||
this.charCount = res.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
for (int i = 0; i < string.length(); i++) {
|
||||
if (string.charAt(i) != Character.toLowerCase(Character.toUpperCase(testString.charAt(strIndex + i)))) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return string.length();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "UCI sequence: " + string; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single supplementary codepoint in Unicode case
|
||||
* insensitive manner.
|
||||
*/
|
||||
class UCISupplCharSet extends LeafSet {
|
||||
|
||||
// int value of this supplementary codepoint
|
||||
private int ch;
|
||||
|
||||
public UCISupplCharSet(int ch) {
|
||||
charCount = 2;
|
||||
this.ch = Character.toLowerCase(Character.toUpperCase(ch));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int accepts(int strIndex, CharSequence testString) {
|
||||
char high = testString.charAt(strIndex++);
|
||||
char low = testString.charAt(strIndex);
|
||||
return (this.ch == Character.toLowerCase(Character.toUpperCase(Character.toCodePoint(high, low)))) ? 2 : -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "UCI " + new String(Character.toChars(ch));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Portions, Copyright © 1991-2005 Unicode, Inc. The following applies to Unicode.
|
||||
*
|
||||
* COPYRIGHT AND PERMISSION NOTICE
|
||||
*
|
||||
* Copyright © 1991-2005 Unicode, Inc. All rights reserved. Distributed under
|
||||
* the Terms of Use in http://www.unicode.org/copyright.html. Permission is
|
||||
* hereby granted, free of charge, to any person obtaining a copy of the
|
||||
* Unicode data files and any associated documentation (the "Data Files")
|
||||
* or Unicode software and any associated documentation (the "Software")
|
||||
* to deal in the Data Files or Software without restriction, including without
|
||||
* limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
* and/or sell copies of the Data Files or Software, and to permit persons
|
||||
* to whom the Data Files or Software are furnished to do so, provided that
|
||||
* (a) the above copyright notice(s) and this permission notice appear with
|
||||
* all copies of the Data Files or Software, (b) both the above copyright
|
||||
* notice(s) and this permission notice appear in associated documentation,
|
||||
* and (c) there is clear notice in each modified Data File or in the Software
|
||||
* as well as in the documentation associated with the Data File(s) or Software
|
||||
* that the data or software has been modified.
|
||||
|
||||
* THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
|
||||
* INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
|
||||
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
* PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall
|
||||
* not be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in these Data Files or Software without prior written
|
||||
* authorization of the copyright holder.
|
||||
*
|
||||
* 2. Additional terms from the Database:
|
||||
*
|
||||
* Copyright © 1995-1999 Unicode, Inc. All Rights reserved.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* The Unicode Character Database is provided as is by Unicode, Inc.
|
||||
* No claims are made as to fitness for any particular purpose. No warranties
|
||||
* of any kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been purchased
|
||||
* on magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
* will be exchange of defective media within 90 days of receipt. This disclaimer
|
||||
* is applicable for all other data files accompanying the Unicode Character Database,
|
||||
* some of which have been compiled by the Unicode Consortium, and some of which
|
||||
* have been supplied by other sources.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Data
|
||||
*
|
||||
* Recipient is granted the right to make copies in any form for internal
|
||||
* distribution and to freely use the information supplied in the creation of
|
||||
* products supporting the UnicodeTM Standard. The files in
|
||||
* the Unicode Character Database can be redistributed to third parties or other
|
||||
* organizations (whether for profit or not) as long as this notice and the disclaimer
|
||||
* notice are retained. Information can be extracted from these files and used
|
||||
* in documentation or programs, as long as there is an accompanying notice
|
||||
* indicating the source.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents node accepting single character from the given char class
|
||||
* in Unicode case insensitive manner.
|
||||
* This character can be supplementary (2 chars to represent) or from
|
||||
* basic multilingual pane (1 char to represent).
|
||||
*/
|
||||
class UCISupplRangeSet extends SupplRangeSet{
|
||||
|
||||
public UCISupplRangeSet(AbstractCharClass cs, AbstractSet next) {
|
||||
super(cs, next);
|
||||
}
|
||||
|
||||
public UCISupplRangeSet(AbstractCharClass cc) {
|
||||
super(cc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return chars.contains(Character.toLowerCase(Character.toUpperCase(ch)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "UCI range:" + (alt ? "^ " : " ") + chars.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Unix line terminator, accepting only \n.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
final class UEOLSet extends AbstractSet {
|
||||
|
||||
private int consCounter;
|
||||
|
||||
public UEOLSet(int counter) {
|
||||
this.consCounter = counter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int rightBound = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() : testString.length();
|
||||
|
||||
if (strIndex >= rightBound) {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
}
|
||||
// check final line terminator;
|
||||
|
||||
if ((rightBound - strIndex) == 1 && testString.charAt(strIndex) == '\n') {
|
||||
matchResult.setConsumed(consCounter, 1);
|
||||
return next.matches(strIndex + 1, testString, matchResult);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons;
|
||||
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
|
||||
matchResult.setConsumed(consCounter, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<EOL>"; //$NON-NLS-1$
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Unix style multiline end-of-line node.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UMultiLineEOLSet extends AbstractSet {
|
||||
|
||||
private int consCounter;
|
||||
|
||||
public UMultiLineEOLSet(int counter) {
|
||||
this.consCounter = counter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int strIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int strDif = matchResult.hasAnchoringBounds() ? matchResult.getRightBound() - strIndex : testString.length() -
|
||||
strIndex;
|
||||
if (strDif <= 0) {
|
||||
matchResult.setConsumed(consCounter, 0);
|
||||
return next.matches(strIndex, testString, matchResult);
|
||||
} else if (testString.charAt(strIndex) == '\n') {
|
||||
matchResult.setConsumed(consCounter, 1);
|
||||
return next.matches(strIndex + 1, testString, matchResult);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
int cons;
|
||||
boolean res = ((cons = matchResult.getConsumed(consCounter)) < 0 || cons > 0);
|
||||
matchResult.setConsumed(consCounter, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "<Unix MultiLine $>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Unicode category (i.e. Ll, Lu).
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UnicodeCategory extends AbstractCharClass {
|
||||
|
||||
protected int category;
|
||||
|
||||
public UnicodeCategory(int category) {
|
||||
this.category = category;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return alt ^ (category == Character.getType((char) ch));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Unicode category scope (i.e IsL, IsM, ...)
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UnicodeCategoryScope extends UnicodeCategory {
|
||||
|
||||
public UnicodeCategoryScope(int category) {
|
||||
super(category);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(int ch) {
|
||||
return alt ^ ((category >> Character.getType((char) ch)) & 1) != 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Greedy quantifier node for the case where there is no intersection with next
|
||||
* node and normal quantifiers could be treated as greedy and possessive.
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class UnifiedQuantifierSet extends LeafQuantifierSet {
|
||||
|
||||
public UnifiedQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
|
||||
super(innerSet, next, type);
|
||||
}
|
||||
|
||||
public UnifiedQuantifierSet(LeafQuantifierSet quant) {
|
||||
super((LeafSet)quant.getInnerSet(), quant.getNext(), quant.getType());
|
||||
innerSet.setNext(this);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
while (stringIndex + leaf.charCount() <= matchResult.getRightBound() &&
|
||||
leaf.accepts(stringIndex, testString) > 0)
|
||||
stringIndex += leaf.charCount();
|
||||
|
||||
return next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
int startSearch = next.find(stringIndex, testString, matchResult);
|
||||
if (startSearch < 0)
|
||||
return -1;
|
||||
int newSearch = startSearch - leaf.charCount();
|
||||
while (newSearch >= stringIndex && leaf.accepts(newSearch, testString) > 0) {
|
||||
startSearch = newSearch;
|
||||
newSearch -= leaf.charCount();
|
||||
}
|
||||
|
||||
return startSearch;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
/**
|
||||
* Represents word boundary, checks current character and previous one if
|
||||
* different types returns true;
|
||||
*
|
||||
* @author Nikolay A. Kuznetsov
|
||||
*/
|
||||
class WordBoundary extends AbstractSet {
|
||||
|
||||
boolean positive;
|
||||
|
||||
public WordBoundary(boolean positive) {
|
||||
this.positive = positive;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matches(int stringIndex, CharSequence testString, MatchResultImpl matchResult) {
|
||||
boolean left;
|
||||
boolean right;
|
||||
|
||||
char ch1 = stringIndex >= matchResult.getRightBound() ? ' ' : testString.charAt(stringIndex);
|
||||
char ch2 = stringIndex == 0 ? ' ' : testString.charAt(stringIndex - 1);
|
||||
|
||||
int leftBound = matchResult.hasTransparentBounds() ? 0 : matchResult.getLeftBound();
|
||||
left = (ch1 == ' ') || isSpace(ch1, stringIndex, leftBound, testString);
|
||||
right = (ch2 == ' ') || isSpace(ch2, stringIndex - 1, leftBound, testString);
|
||||
return ((left ^ right) ^ positive) ? -1 : next.matches(stringIndex, testString, matchResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns false, because word boundary does not consumes any characters and
|
||||
* do not move string index.
|
||||
*/
|
||||
@Override
|
||||
public boolean hasConsumed(MatchResultImpl matchResult) {
|
||||
// only checks boundary, do not consumes characters
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getName() {
|
||||
return "WordBoundary"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
private boolean isSpace(char ch, int index, int leftBound, CharSequence testString) {
|
||||
if (Character.isLetterOrDigit(ch) || ch == '_')
|
||||
return false;
|
||||
if (Character.getType(ch) == Character.NON_SPACING_MARK) {
|
||||
for (; --index >= leftBound;) {
|
||||
ch = testString.charAt(index);
|
||||
if (Character.isLetterOrDigit(ch))
|
||||
return false;
|
||||
if (Character.getType(ch) != Character.NON_SPACING_MARK)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,234 @@
|
|||
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests Matcher methods
|
||||
*/
|
||||
@SuppressWarnings("nls")
|
||||
public class Matcher2Test extends TestCase {
|
||||
public void test_toString() {
|
||||
Pattern p = Pattern.compile("foo");
|
||||
Matcher m = p.matcher("bar");
|
||||
assertNotNull(m.toString());
|
||||
}
|
||||
|
||||
public void testErrorConditions() throws PatternSyntaxException {
|
||||
// Test match cursors in absence of a match
|
||||
Pattern p = Pattern.compile("foo");
|
||||
Matcher m = p.matcher("bar");
|
||||
assertFalse(m.matches());
|
||||
|
||||
try {
|
||||
m.start();
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end();
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group();
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.start(1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
// regression test for HARMONY-2418
|
||||
try {
|
||||
m.usePattern(null);
|
||||
fail("IllegalArgumentException expected");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// PASSED
|
||||
}
|
||||
}
|
||||
|
||||
public void testErrorConditions2() throws PatternSyntaxException {
|
||||
// Test match cursors in absence of a match
|
||||
Pattern p = Pattern.compile("(foo[0-9])(bar[a-z])");
|
||||
Matcher m = p.matcher("foo1barzfoo2baryfoozbar5");
|
||||
|
||||
assertTrue(m.find());
|
||||
assertEquals(0, m.start());
|
||||
assertEquals(8, m.end());
|
||||
assertEquals(0, m.start(1));
|
||||
assertEquals(4, m.end(1));
|
||||
assertEquals(4, m.start(2));
|
||||
assertEquals(8, m.end(2));
|
||||
|
||||
try {
|
||||
m.start(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.start(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
assertTrue(m.find());
|
||||
assertEquals(8, m.start());
|
||||
assertEquals(16, m.end());
|
||||
assertEquals(8, m.start(1));
|
||||
assertEquals(12, m.end(1));
|
||||
assertEquals(12, m.start(2));
|
||||
assertEquals(16, m.end(2));
|
||||
|
||||
try {
|
||||
m.start(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(3);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.start(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(-1);
|
||||
fail("IndexOutOfBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
assertFalse(m.find());
|
||||
|
||||
try {
|
||||
m.start(3);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(3);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(3);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.start(-1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.end(-1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.group(-1);
|
||||
fail("IllegalStateException expected");
|
||||
} catch (IllegalStateException e) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Regression test for HARMONY-997
|
||||
*/
|
||||
public void testReplacementBackSlash() {
|
||||
String str = "replace me";
|
||||
String replacedString = "me";
|
||||
String substitutionString = "\\";
|
||||
Pattern pat = Pattern.compile(replacedString);
|
||||
Matcher mat = pat.matcher(str);
|
||||
try {
|
||||
mat.replaceAll(substitutionString);
|
||||
fail("IndexOutOfBoundsException should be thrown");
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,772 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
@SuppressWarnings("nls")
|
||||
public class MatcherTest {
|
||||
String[] testPatterns = {
|
||||
"(a|b)*abb",
|
||||
"(1*2*3*4*)*567",
|
||||
"(a|b|c|d)*aab",
|
||||
"(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
|
||||
"(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
|
||||
"(a|b)*(a|b)*A(a|b)*lice.*",
|
||||
"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
|
||||
+ "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do" };
|
||||
|
||||
String[] groupPatterns = { "(a|b)*aabb", "((a)|b)*aabb", "((a|b)*)a(abb)",
|
||||
"(((a)|(b))*)aabb", "(((a)|(b))*)aa(b)b", "(((a)|(b))*)a(a(b)b)" };
|
||||
|
||||
@Test
|
||||
public void testRegionsIntInt() {
|
||||
Pattern p = Pattern.compile("x*");
|
||||
Matcher m = p.matcher("axxxxxa");
|
||||
assertFalse(m.matches());
|
||||
|
||||
m.region(1, 6);
|
||||
assertEquals(1, m.regionStart());
|
||||
assertEquals(6, m.regionEnd());
|
||||
assertTrue(m.matches());
|
||||
|
||||
try {
|
||||
m.region(1, 0);
|
||||
fail("expected an IOOBE");
|
||||
} catch(IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.region(-1, 2);
|
||||
fail("expected an IOOBE");
|
||||
} catch(IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.region(10, 11);
|
||||
fail("expected an IOOBE");
|
||||
} catch(IndexOutOfBoundsException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
m.region(1, 10);
|
||||
fail("expected an IOOBE");
|
||||
} catch(IndexOutOfBoundsException e) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendReplacement() {
|
||||
Pattern pat = Pattern.compile("XX");
|
||||
Matcher m = pat.matcher("Today is XX-XX-XX ...");
|
||||
StringBuffer sb = new StringBuffer();
|
||||
|
||||
for (int i = 0; m.find(); i++) {
|
||||
m.appendReplacement(sb, new Integer(i * 10 + i).toString());
|
||||
}
|
||||
m.appendTail(sb);
|
||||
assertEquals("Today is 0-11-22 ...", sb.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendReplacementRef() {
|
||||
Pattern p = Pattern.compile("xx (rur|\\$)");
|
||||
Matcher m = p.matcher("xx $ equals to xx rur.");
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 1; m.find(); i *= 30) {
|
||||
String rep = new Integer(i).toString() + " $1";
|
||||
m.appendReplacement(sb, rep);
|
||||
}
|
||||
m.appendTail(sb);
|
||||
assertEquals("1 $ equals to 30 rur.", sb.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceAll() {
|
||||
String input = "aabfooaabfooabfoob";
|
||||
String pattern = "a*b";
|
||||
Pattern pat = Pattern.compile(pattern);
|
||||
Matcher mat = pat.matcher(input);
|
||||
|
||||
assertEquals("-foo-foo-foo-", mat.replaceAll("-"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResetCharSequence() {
|
||||
Pattern p = Pattern.compile("abcd");
|
||||
Matcher m = p.matcher("abcd");
|
||||
assertTrue(m.matches());
|
||||
m.reset("efgh");
|
||||
assertFalse(m.matches());
|
||||
|
||||
try {
|
||||
m.reset(null);
|
||||
fail("expected a NPE");
|
||||
} catch (NullPointerException e) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendSlashes() {
|
||||
Pattern p = Pattern.compile("\\\\");
|
||||
Matcher m = p.matcher("one\\cat\\two\\cats\\in\\the\\yard");
|
||||
StringBuffer sb = new StringBuffer();
|
||||
while (m.find()) {
|
||||
m.appendReplacement(sb, "\\\\");
|
||||
}
|
||||
m.appendTail(sb);
|
||||
assertEquals("one\\cat\\two\\cats\\in\\the\\yard", sb.toString());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceFirst() {
|
||||
String input = "zzzdogzzzdogzzz";
|
||||
String pattern = "dog";
|
||||
Pattern pat = Pattern.compile(pattern);
|
||||
Matcher mat = pat.matcher(input);
|
||||
|
||||
assertEquals("zzzcatzzzdogzzz", mat.replaceFirst("cat"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPattern() {
|
||||
for (String element : testPatterns) {
|
||||
Pattern test = Pattern.compile(element);
|
||||
assertEquals(test, test.matcher("aaa").pattern());
|
||||
}
|
||||
|
||||
for (String element : testPatterns) {
|
||||
assertEquals(element, Pattern.compile(element).matcher("aaa")
|
||||
.pattern().toString());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for Matcher reset()
|
||||
*/
|
||||
public void testReset() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupint() {
|
||||
String positiveTestString = "ababababbaaabb";
|
||||
|
||||
// test IndexOutOfBoundsException
|
||||
// //
|
||||
for (int i = 0; i < groupPatterns.length; i++) {
|
||||
Pattern test = Pattern.compile(groupPatterns[i]);
|
||||
Matcher mat = test.matcher(positiveTestString);
|
||||
mat.matches();
|
||||
try {
|
||||
// groupPattern <index + 1> equals to number of groups
|
||||
// of the specified pattern
|
||||
// //
|
||||
mat.group(i + 2);
|
||||
fail("IndexOutBoundsException expected");
|
||||
mat.group(i + 100);
|
||||
fail("IndexOutBoundsException expected");
|
||||
mat.group(-1);
|
||||
fail("IndexOutBoundsException expected");
|
||||
mat.group(-100);
|
||||
fail("IndexOutBoundsException expected");
|
||||
} catch (IndexOutOfBoundsException iobe) {
|
||||
}
|
||||
}
|
||||
|
||||
String[][] groupResults = { { "a" }, { "a", "a" },
|
||||
{ "ababababba", "a", "abb" }, { "ababababba", "a", "a", "b" },
|
||||
{ "ababababba", "a", "a", "b", "b" },
|
||||
{ "ababababba", "a", "a", "b", "abb", "b" }, };
|
||||
|
||||
for (int i = 0; i < groupPatterns.length; i++) {
|
||||
Pattern test = Pattern.compile(groupPatterns[i]);
|
||||
Matcher mat = test.matcher(positiveTestString);
|
||||
mat.matches();
|
||||
for (int j = 0; j < groupResults[i].length; j++) {
|
||||
assertEquals("i: " + i + " j: " + j, groupResults[i][j], mat
|
||||
.group(j + 1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroup() {
|
||||
String positiveTestString = "ababababbaaabb";
|
||||
String negativeTestString = "gjhfgdsjfhgcbv";
|
||||
for (String element : groupPatterns) {
|
||||
Pattern test = Pattern.compile(element);
|
||||
Matcher mat = test.matcher(positiveTestString);
|
||||
mat.matches();
|
||||
// test result
|
||||
assertEquals(positiveTestString, mat.group());
|
||||
|
||||
// test equal to group(0) result
|
||||
assertEquals(mat.group(0), mat.group());
|
||||
}
|
||||
|
||||
for (String element : groupPatterns) {
|
||||
Pattern test = Pattern.compile(element);
|
||||
Matcher mat = test.matcher(negativeTestString);
|
||||
mat.matches();
|
||||
try {
|
||||
mat.group();
|
||||
fail("IllegalStateException expected for <false> matches result");
|
||||
} catch (IllegalStateException ise) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupPossessive() {
|
||||
Pattern pat = Pattern.compile("((a)|(b))++c");
|
||||
Matcher mat = pat.matcher("aac");
|
||||
|
||||
mat.matches();
|
||||
assertEquals("a", mat.group(1));
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for boolean find(int)
|
||||
*/
|
||||
public void testFindint() {
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for int start(int)
|
||||
*/
|
||||
public void testStartint() {
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for int end(int)
|
||||
*/
|
||||
public void testEndint() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchesMisc() {
|
||||
String[][] posSeq = {
|
||||
{ "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
|
||||
{ "213567", "12324567", "1234567", "213213567",
|
||||
"21312312312567", "444444567" },
|
||||
{ "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
|
||||
{ "213234567", "3458", "0987654", "7689546432", "0398576",
|
||||
"98432", "5" },
|
||||
{
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
|
||||
{ "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
|
||||
"abbbAbbbliceaaa", "Alice" },
|
||||
{ "a123", "bnxnvgds156", "for", "while", "if", "struct" }
|
||||
|
||||
};
|
||||
|
||||
for (int i = 0; i < testPatterns.length; i++) {
|
||||
Pattern pat = Pattern.compile(testPatterns[i]);
|
||||
for (int j = 0; j < posSeq[i].length; j++) {
|
||||
Matcher mat = pat.matcher(posSeq[i][j]);
|
||||
assertTrue("Incorrect match: " + testPatterns[i] + " vs "
|
||||
+ posSeq[i][j], mat.matches());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchesQuantifiers() {
|
||||
String[] testPatternsSingles = { "a{5}", "a{2,4}", "a{3,}" };
|
||||
String[] testPatternsMultiple = { "((a)|(b)){1,2}abb",
|
||||
"((a)|(b)){2,4}", "((a)|(b)){3,}" };
|
||||
|
||||
String[][] stringSingles = { { "aaaaa", "aaa" },
|
||||
{ "aa", "a", "aaa", "aaaaaa", "aaaa", "aaaaa" },
|
||||
{ "aaa", "a", "aaaa", "aa" }, };
|
||||
|
||||
String[][] stringMultiples = { { "ababb", "aba" },
|
||||
{ "ab", "b", "bab", "ababa", "abba", "abababbb" },
|
||||
{ "aba", "b", "abaa", "ba" }, };
|
||||
|
||||
for (int i = 0; i < testPatternsSingles.length; i++) {
|
||||
Pattern pat = Pattern.compile(testPatternsSingles[i]);
|
||||
for (int j = 0; j < stringSingles.length / 2; j++) {
|
||||
assertTrue("Match expected, but failed: " + pat.pattern()
|
||||
+ " : " + stringSingles[i][j], pat.matcher(
|
||||
stringSingles[i][j * 2]).matches());
|
||||
assertFalse("Match failure expected, but match succeed: "
|
||||
+ pat.pattern() + " : " + stringSingles[i][j * 2 + 1],
|
||||
pat.matcher(stringSingles[i][j * 2 + 1]).matches());
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < testPatternsMultiple.length; i++) {
|
||||
Pattern pat = Pattern.compile(testPatternsMultiple[i]);
|
||||
for (int j = 0; j < stringMultiples.length / 2; j++) {
|
||||
assertTrue("Match expected, but failed: " + pat.pattern()
|
||||
+ " : " + stringMultiples[i][j], pat.matcher(
|
||||
stringMultiples[i][j * 2]).matches());
|
||||
assertFalse(
|
||||
"Match failure expected, but match succeed: "
|
||||
+ pat.pattern() + " : "
|
||||
+ stringMultiples[i][j * 2 + 1], pat.matcher(
|
||||
stringMultiples[i][j * 2 + 1]).matches());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantVsGroup() {
|
||||
String patternString = "(d{1,3})((a|c)*)(d{1,3})((a|c)*)(d{1,3})";
|
||||
String testString = "dacaacaacaaddaaacaacaaddd";
|
||||
|
||||
Pattern pat = Pattern.compile(patternString);
|
||||
Matcher mat = pat.matcher(testString);
|
||||
|
||||
mat.matches();
|
||||
assertEquals("dacaacaacaaddaaacaacaaddd", mat.group());
|
||||
assertEquals("d", mat.group(1));
|
||||
assertEquals("acaacaacaa", mat.group(2));
|
||||
assertEquals("dd", mat.group(4));
|
||||
assertEquals("aaacaacaa", mat.group(5));
|
||||
assertEquals("ddd", mat.group(7));
|
||||
}
|
||||
|
||||
public void testLookingAt() {
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for boolean find()
|
||||
*/
|
||||
public void testFind() {
|
||||
String testPattern = "(abb)";
|
||||
String testString = "cccabbabbabbabbabb";
|
||||
Pattern pat = Pattern.compile(testPattern);
|
||||
Matcher mat = pat.matcher(testString);
|
||||
int start = 3;
|
||||
int end = 6;
|
||||
while (mat.find()) {
|
||||
assertEquals(start, mat.start(1));
|
||||
assertEquals(end, mat.end(1));
|
||||
|
||||
start = end;
|
||||
end += 3;
|
||||
}
|
||||
|
||||
testPattern = "(\\d{1,3})";
|
||||
testString = "aaaa123456789045";
|
||||
|
||||
Pattern pat2 = Pattern.compile(testPattern);
|
||||
Matcher mat2 = pat2.matcher(testString);
|
||||
start = 4;
|
||||
int length = 3;
|
||||
while (mat2.find()) {
|
||||
assertEquals(testString.substring(start, start + length), mat2
|
||||
.group(1));
|
||||
start += length;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSEOLsymbols() {
|
||||
Pattern pat = Pattern.compile("^a\\(bb\\[$");
|
||||
Matcher mat = pat.matcher("a(bb[");
|
||||
|
||||
assertTrue(mat.matches());
|
||||
}
|
||||
|
||||
/*
|
||||
* Class under test for int start()
|
||||
*/
|
||||
public void testStart() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupCount() {
|
||||
for (int i = 0; i < groupPatterns.length; i++) {
|
||||
Pattern test = Pattern.compile(groupPatterns[i]);
|
||||
Matcher mat = test.matcher("ababababbaaabb");
|
||||
mat.matches();
|
||||
assertEquals(i + 1, mat.groupCount());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRelactantQuantifiers() {
|
||||
Pattern pat = Pattern.compile("(ab*)*b");
|
||||
Matcher mat = pat.matcher("abbbb");
|
||||
|
||||
if (mat.matches()) {
|
||||
assertEquals("abbb", mat.group(1));
|
||||
} else {
|
||||
fail("Match expected: (ab*)*b vs abbbb");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEnhancedFind() {
|
||||
String input = "foob";
|
||||
String pattern = "a*b";
|
||||
Pattern pat = Pattern.compile(pattern);
|
||||
Matcher mat = pat.matcher(input);
|
||||
|
||||
mat.find();
|
||||
assertEquals("b", mat.group());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPosCompositeGroup() {
|
||||
String[] posExamples = { "aabbcc", "aacc", "bbaabbcc" };
|
||||
String[] negExamples = { "aabb", "bb", "bbaabb" };
|
||||
Pattern posPat = Pattern.compile("(aa|bb){1,3}+cc");
|
||||
Pattern negPat = Pattern.compile("(aa|bb){1,3}+bb");
|
||||
|
||||
Matcher mat;
|
||||
for (String element : posExamples) {
|
||||
mat = posPat.matcher(element);
|
||||
assertTrue(mat.matches());
|
||||
}
|
||||
|
||||
for (String element : negExamples) {
|
||||
mat = negPat.matcher(element);
|
||||
assertFalse(mat.matches());
|
||||
}
|
||||
|
||||
assertTrue(Pattern.matches("(aa|bb){1,3}+bb", "aabbaabb"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPosAltGroup() {
|
||||
String[] posExamples = { "aacc", "bbcc", "cc" };
|
||||
String[] negExamples = { "bb", "aa" };
|
||||
Pattern posPat = Pattern.compile("(aa|bb)?+cc");
|
||||
Pattern negPat = Pattern.compile("(aa|bb)?+bb");
|
||||
|
||||
Matcher mat;
|
||||
for (String element : posExamples) {
|
||||
mat = posPat.matcher(element);
|
||||
assertTrue(posPat.toString() + " vs: " + element, mat.matches());
|
||||
}
|
||||
|
||||
for (String element : negExamples) {
|
||||
mat = negPat.matcher(element);
|
||||
assertFalse(mat.matches());
|
||||
}
|
||||
|
||||
assertTrue(Pattern.matches("(aa|bb)?+bb", "aabb"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRelCompGroup() {
|
||||
|
||||
Matcher mat;
|
||||
Pattern pat;
|
||||
String res = "";
|
||||
for (int i = 0; i < 4; i++) {
|
||||
pat = Pattern.compile("((aa|bb){" + i + ",3}?).*cc");
|
||||
mat = pat.matcher("aaaaaacc");
|
||||
assertTrue(pat.toString() + " vs: " + "aaaaaacc", mat.matches());
|
||||
assertEquals(res, mat.group(1));
|
||||
res += "aa";
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRelAltGroup() {
|
||||
|
||||
Matcher mat;
|
||||
Pattern pat;
|
||||
|
||||
pat = Pattern.compile("((aa|bb)??).*cc");
|
||||
mat = pat.matcher("aacc");
|
||||
assertTrue(pat.toString() + " vs: " + "aacc", mat.matches());
|
||||
assertEquals("", mat.group(1));
|
||||
|
||||
pat = Pattern.compile("((aa|bb)??)cc");
|
||||
mat = pat.matcher("aacc");
|
||||
assertTrue(pat.toString() + " vs: " + "aacc", mat.matches());
|
||||
assertEquals("aa", mat.group(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIgnoreCase() {
|
||||
Pattern pat = Pattern.compile("(aa|bb)*", Pattern.CASE_INSENSITIVE);
|
||||
Matcher mat = pat.matcher("aAbb");
|
||||
|
||||
assertTrue(mat.matches());
|
||||
|
||||
pat = Pattern.compile("(a|b|c|d|e)*", Pattern.CASE_INSENSITIVE);
|
||||
mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa");
|
||||
assertTrue(mat.matches());
|
||||
|
||||
pat = Pattern.compile("[a-e]*", Pattern.CASE_INSENSITIVE);
|
||||
mat = pat.matcher("aAebbAEaEdebbedEccEdebbedEaedaebEbdCCdbBDcdcdADa");
|
||||
assertTrue(mat.matches());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuoteReplacement() {
|
||||
assertEquals("\\\\aaCC\\$1", Matcher.quoteReplacement("\\aaCC$1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverFlow() {
|
||||
Pattern tp = Pattern.compile("(a*)*");
|
||||
Matcher tm = tp.matcher("aaa");
|
||||
assertTrue(tm.matches());
|
||||
assertEquals("", tm.group(1));
|
||||
|
||||
assertTrue(Pattern.matches("(1+)\\1+", "11"));
|
||||
assertTrue(Pattern.matches("(1+)(2*)\\2+", "11"));
|
||||
|
||||
Pattern pat = Pattern.compile("(1+)\\1*");
|
||||
Matcher mat = pat.matcher("11");
|
||||
|
||||
assertTrue(mat.matches());
|
||||
assertEquals("11", mat.group(1));
|
||||
|
||||
pat = Pattern.compile("((1+)|(2+))(\\2+)");
|
||||
mat = pat.matcher("11");
|
||||
|
||||
assertTrue(mat.matches());
|
||||
assertEquals("1", mat.group(2));
|
||||
assertEquals("1", mat.group(1));
|
||||
assertEquals("1", mat.group(4));
|
||||
assertNull(mat.group(3));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnicode() {
|
||||
|
||||
assertTrue(Pattern.matches("\\x61a", "aa"));
|
||||
assertTrue(Pattern.matches("\\u0061a", "aa"));
|
||||
assertTrue(Pattern.matches("\\0141a", "aa"));
|
||||
assertTrue(Pattern.matches("\\0777", "?7"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnicodeCategory() {
|
||||
assertTrue(Pattern.matches("\\p{Ll}", "k")); // Unicode lower case
|
||||
assertTrue(Pattern.matches("\\P{Ll}", "K")); // Unicode non-lower
|
||||
// case
|
||||
assertTrue(Pattern.matches("\\p{Lu}", "K")); // Unicode upper case
|
||||
assertTrue(Pattern.matches("\\P{Lu}", "k")); // Unicode non-upper
|
||||
// case
|
||||
// combinations
|
||||
assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "k"));
|
||||
assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "K"));
|
||||
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}]]", "K"));
|
||||
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Ll}]]", "k"));
|
||||
|
||||
// category/character combinations
|
||||
assertFalse(Pattern.matches("[\\p{L}&&[^a-z]]", "k"));
|
||||
assertTrue(Pattern.matches("[\\p{L}&&[^a-z]]", "K"));
|
||||
|
||||
assertTrue(Pattern.matches("[\\p{Lu}a-z]", "k"));
|
||||
assertTrue(Pattern.matches("[a-z\\p{Lu}]", "k"));
|
||||
|
||||
assertFalse(Pattern.matches("[\\p{Lu}a-d]", "k"));
|
||||
assertTrue(Pattern.matches("[a-d\\p{Lu}]", "K"));
|
||||
|
||||
// assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^K]]]", "K"));
|
||||
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^G]]]", "K"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitEmpty() {
|
||||
|
||||
Pattern pat = Pattern.compile("");
|
||||
String[] s = pat.split("", -1);
|
||||
|
||||
assertEquals(1, s.length);
|
||||
assertEquals("", s[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindDollar() {
|
||||
Matcher mat = Pattern.compile("a$").matcher("a\n");
|
||||
assertTrue(mat.find());
|
||||
assertEquals("a", mat.group());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchesRegionChanged() {
|
||||
// Regression for HARMONY-610
|
||||
String input = " word ";
|
||||
Pattern pattern = Pattern.compile("\\w+");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
matcher.region(1, 5);
|
||||
assertTrue(matcher.matches());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllCodePoints() {
|
||||
// Regression for HARMONY-3145
|
||||
int[] codePoint = new int[1];
|
||||
Pattern p = Pattern.compile("(\\p{all})+");
|
||||
boolean res = true;
|
||||
int cnt = 0;
|
||||
String s;
|
||||
for (int i = 0; i < 0x110000; i++) {
|
||||
codePoint[0] = i;
|
||||
s = new String(codePoint, 0, 1);
|
||||
if (!s.matches(p.toString())) {
|
||||
cnt++;
|
||||
res = false;
|
||||
}
|
||||
}
|
||||
assertTrue(res);
|
||||
assertEquals(0, cnt);
|
||||
|
||||
p = Pattern.compile("(\\P{all})+");
|
||||
res = true;
|
||||
cnt = 0;
|
||||
|
||||
for (int i = 0; i < 0x110000; i++) {
|
||||
codePoint[0] = i;
|
||||
s = new String(codePoint, 0, 1);
|
||||
if (!s.matches(p.toString())) {
|
||||
cnt++;
|
||||
res = false;
|
||||
}
|
||||
}
|
||||
|
||||
assertFalse(res);
|
||||
assertEquals(0x110000, cnt);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindRegionChanged() {
|
||||
// Regression for HARMONY-625
|
||||
Pattern pattern = Pattern.compile("(?s).*");
|
||||
Matcher matcher = pattern.matcher("abcde");
|
||||
matcher.find();
|
||||
assertEquals("abcde", matcher.group());
|
||||
|
||||
matcher = pattern.matcher("abcde");
|
||||
matcher.region(0, 2);
|
||||
matcher.find();
|
||||
assertEquals("ab", matcher.group());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFindRegionChanged2() {
|
||||
// Regression for HARMONY-713
|
||||
Pattern pattern = Pattern.compile("c");
|
||||
|
||||
String inputStr = "aabb.c";
|
||||
Matcher matcher = pattern.matcher(inputStr);
|
||||
matcher.region(0, 3);
|
||||
|
||||
assertFalse(matcher.find());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPatternMatcher() throws Exception {
|
||||
Pattern pattern = Pattern.compile("(?:\\d+)(?:pt)");
|
||||
assertTrue(pattern.matcher("14pt").matches());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test3360() {
|
||||
String str = "!\"#%&'(),-./";
|
||||
Pattern p = Pattern.compile("\\s");
|
||||
Matcher m = p.matcher(str);
|
||||
|
||||
assertFalse(m.find());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGeneralPunctuationCategory() {
|
||||
String[] s = { ",", "!", "\"", "#", "%", "&", "'", "(", ")", "-", ".",
|
||||
"/" };
|
||||
String regexp = "\\p{P}";
|
||||
|
||||
for (int i = 0; i < s.length; i++) {
|
||||
Pattern pattern = Pattern.compile(regexp);
|
||||
Matcher matcher = pattern.matcher(s[i]);
|
||||
assertTrue(matcher.find());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHitEndAfterFind() {
|
||||
hitEndTest(true, "#01.0", "r((ege)|(geg))x", "regexx", false);
|
||||
hitEndTest(true, "#01.1", "r((ege)|(geg))x", "regex", false);
|
||||
hitEndTest(true, "#01.2", "r((ege)|(geg))x", "rege", true);
|
||||
hitEndTest(true, "#01.2", "r((ege)|(geg))x", "xregexx", false);
|
||||
|
||||
hitEndTest(true, "#02.0", "regex", "rexreger", true);
|
||||
hitEndTest(true, "#02.1", "regex", "raxregexr", false);
|
||||
|
||||
String floatRegex = getHexFloatRegex();
|
||||
hitEndTest(true, "#03.0", floatRegex, Double.toHexString(-1.234d), true);
|
||||
hitEndTest(true, "#03.1", floatRegex, "1 ABC"
|
||||
+ Double.toHexString(Double.NaN) + "buhuhu", false);
|
||||
hitEndTest(true, "#03.2", floatRegex, Double.toHexString(-0.0) + "--",
|
||||
false);
|
||||
hitEndTest(true, "#03.3", floatRegex, "--"
|
||||
+ Double.toHexString(Double.MIN_VALUE) + "--", false);
|
||||
|
||||
hitEndTest(true, "#04.0", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)",
|
||||
"1 fish 2 fish red fish 5", true);
|
||||
hitEndTest(true, "#04.1", "(\\d+) fish (\\d+) fish (\\w+) fish (\\d+)",
|
||||
"----1 fish 2 fish red fish 5----", false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToString() {
|
||||
String result = Pattern.compile("(\\d{1,3})").matcher(
|
||||
"aaaa123456789045").toString();
|
||||
assertTrue("The result doesn't contain pattern info", result
|
||||
.contains("(\\d{1,3})"));
|
||||
}
|
||||
|
||||
private void hitEndTest(boolean callFind, String testNo, String regex,
|
||||
String input, boolean hit) {
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
if (callFind) {
|
||||
matcher.find();
|
||||
} else {
|
||||
matcher.matches();
|
||||
}
|
||||
boolean h = matcher.hitEnd();
|
||||
|
||||
assertTrue(testNo, h == hit);
|
||||
}
|
||||
|
||||
private String getHexFloatRegex() {
|
||||
String hexDecimal = "(-|\\+)?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP](-|\\+)?[0-9]+)?";
|
||||
String notANumber = "((-|\\+)?Infinity)|([nN]a[nN])";
|
||||
return new StringBuilder("((").append(hexDecimal).append(")|(").append(
|
||||
notANumber).append("))").toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests Pattern compilation modes and modes triggered in pattern strings
|
||||
*/
|
||||
@SuppressWarnings("nls")
|
||||
public class ModeTest extends TestCase {
|
||||
public void testCase() throws PatternSyntaxException {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("([a-z]+)[0-9]+");
|
||||
m = p.matcher("cAT123#dog345");
|
||||
assertTrue(m.find());
|
||||
assertEquals("dog", m.group(1));
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("([a-z]+)[0-9]+", Pattern.CASE_INSENSITIVE);
|
||||
m = p.matcher("cAt123#doG345");
|
||||
assertTrue(m.find());
|
||||
assertEquals("cAt", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("doG", m.group(1));
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("(?i)([a-z]+)[0-9]+");
|
||||
m = p.matcher("cAt123#doG345");
|
||||
assertTrue(m.find());
|
||||
assertEquals("cAt", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("doG", m.group(1));
|
||||
assertFalse(m.find());
|
||||
}
|
||||
|
||||
public void testMultiline() throws PatternSyntaxException {
|
||||
Pattern p;
|
||||
Matcher m;
|
||||
|
||||
p = Pattern.compile("^foo");
|
||||
m = p.matcher("foobar");
|
||||
assertTrue(m.find());
|
||||
assertTrue(m.start() == 0 && m.end() == 3);
|
||||
assertFalse(m.find());
|
||||
|
||||
m = p.matcher("barfoo");
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("foo$");
|
||||
m = p.matcher("foobar");
|
||||
assertFalse(m.find());
|
||||
|
||||
m = p.matcher("barfoo");
|
||||
assertTrue(m.find());
|
||||
assertTrue(m.start() == 3 && m.end() == 6);
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("^foo([0-9]*)", Pattern.MULTILINE);
|
||||
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
|
||||
assertTrue(m.find());
|
||||
assertEquals("1", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("2", m.group(1));
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("foo([0-9]*)$", Pattern.MULTILINE);
|
||||
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
|
||||
assertTrue(m.find());
|
||||
assertEquals("3", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("4", m.group(1));
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("(?m)^foo([0-9]*)");
|
||||
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
|
||||
assertTrue(m.find());
|
||||
assertEquals("1", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("2", m.group(1));
|
||||
assertFalse(m.find());
|
||||
|
||||
p = Pattern.compile("(?m)foo([0-9]*)$");
|
||||
m = p.matcher("foo1bar\nfoo2foo3\nbarfoo4");
|
||||
assertTrue(m.find());
|
||||
assertEquals("3", m.group(1));
|
||||
assertTrue(m.find());
|
||||
assertEquals("4", m.group(1));
|
||||
assertFalse(m.find());
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,67 @@
|
|||
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Test boundary and error conditions in java.util.regex.Pattern
|
||||
*/
|
||||
@SuppressWarnings("nls")
|
||||
public class PatternErrorTest extends TestCase {
|
||||
public void testCompileErrors() throws Exception {
|
||||
// null regex string - should get NullPointerException
|
||||
try {
|
||||
Pattern.compile(null);
|
||||
fail("NullPointerException expected");
|
||||
} catch (NullPointerException e) {
|
||||
}
|
||||
|
||||
// empty regex string - no exception should be thrown
|
||||
Pattern.compile("");
|
||||
|
||||
// note: invalid regex syntax checked in PatternSyntaxExceptionTest
|
||||
|
||||
// flags = 0 should raise no exception
|
||||
int flags = 0;
|
||||
Pattern.compile("foo", flags);
|
||||
|
||||
// check that all valid flags accepted without exception
|
||||
flags |= Pattern.UNIX_LINES;
|
||||
flags |= Pattern.CASE_INSENSITIVE;
|
||||
flags |= Pattern.MULTILINE;
|
||||
flags |= Pattern.CANON_EQ;
|
||||
flags |= Pattern.COMMENTS;
|
||||
flags |= Pattern.DOTALL;
|
||||
flags |= Pattern.UNICODE_CASE;
|
||||
Pattern.compile("foo", flags);
|
||||
|
||||
// add invalid flags - should get IllegalArgumentException
|
||||
// regression test for HARMONY-4248
|
||||
flags |= 0xFFFFFFFF;
|
||||
// TODO investigate, why this fails and uncomment
|
||||
/*
|
||||
try {
|
||||
Pattern.compile("foo", flags);
|
||||
fail("Expected IllegalArgumentException to be thrown");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// This is the expected exception
|
||||
}*/
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.teavm.classlib.java.util.regex;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
||||
/**
|
||||
* TODO Type description
|
||||
*/
|
||||
@SuppressWarnings("nls")
|
||||
public class PatternSyntaxExceptionTest extends TestCase {
|
||||
public void testCase() {
|
||||
String regex = "(";
|
||||
try {
|
||||
Pattern.compile(regex);
|
||||
fail("PatternSyntaxException expected");
|
||||
} catch (PatternSyntaxException e) {
|
||||
// TOFIX: Commented out assertEquals tests...
|
||||
// TOFIX: should we match exception strings?
|
||||
// assertEquals("Unclosed group", e.getDescription());
|
||||
assertEquals(1, e.getIndex());
|
||||
// assertEquals("Unclosed group near index 1\n(\n ^",
|
||||
// e.getMessage());
|
||||
assertEquals(regex, e.getPattern());
|
||||
}
|
||||
}
|
||||
|
||||
public void testCase2() {
|
||||
String regex = "[4-";
|
||||
try {
|
||||
Pattern.compile(regex);
|
||||
fail("PatternSyntaxException expected");
|
||||
} catch (PatternSyntaxException e) {
|
||||
// TOFIX: Commented out assertEquals tests...
|
||||
// TOFIX: should we match exception strings?
|
||||
// assertEquals("Illegal character range", e.getDescription());
|
||||
assertEquals(3, e.getIndex());
|
||||
// assertEquals("Illegal character range near index 3\n[4-\n ^",
|
||||
// e.getMessage());
|
||||
assertEquals(regex, e.getPattern());
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user