001/*
002 *  gnu/regexp/RE.java
003 *  Copyright (C) 1998-2001 Wes Biggs
004 *
005 *  This library is free software; you can redistribute it and/or modify
006 *  it under the terms of the GNU Lesser General Public License as published
007 *  by the Free Software Foundation; either version 2.1 of the License, or
008 *  (at your option) any later version.
009 *
010 *  This library is distributed in the hope that it will be useful,
011 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
012 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 *  GNU Lesser General Public License for more details.
014 *
015 *  You should have received a copy of the GNU Lesser General Public License
016 *  along with this program; if not, write to the Free Software
017 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
018 */
019
020package gnu.regexp;
021import java.io.InputStream;
022import java.io.Reader;
023import java.io.Serializable;
024import java.util.Locale;
025import java.util.PropertyResourceBundle;
026import java.util.ResourceBundle;
027import java.util.Vector;
028
029class IntPair implements Serializable {
030  public int first, second;
031}
032
033class CharUnit implements Serializable {
034  public char ch;
035  public boolean bk;
036}
037
038/**
039 * RE provides the user interface for compiling and matching regular
040 * expressions.
041 * <P>
042 * A regular expression object (class RE) is compiled by constructing it
043 * from a String, StringBuffer or character array, with optional 
044 * compilation flags (below)
045 * and an optional syntax specification (see RESyntax; if not specified,
046 * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
047 * <P>
048 * Various methods attempt to match input text against a compiled
049 * regular expression.  These methods are:
050 * <LI><code>isMatch</code>: returns true if the input text in its entirety
051 * matches the regular expression pattern.
052 * <LI><code>getMatch</code>: returns the first match found in the input text,
053 * or null if no match is found.
054 * <LI><code>getAllMatches</code>: returns an array of all non-overlapping 
055 * matches found in the input text.  If no matches are found, the array is
056 * zero-length.
057 * <LI><code>substitute</code>: substitute the first occurence of the pattern
058 * in the input text with a replacement string (which may include
059 * metacharacters $0-$9, see REMatch.substituteInto).
060 * <LI><code>substituteAll</code>: same as above, but repeat for each match
061 * before returning.
062 * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration object
063 * that allows iteration over the matches (see REMatchEnumeration for some
064 * reasons why you may want to do this instead of using <code>getAllMatches</code>.
065 * <P>
066 *
067 * These methods all have similar argument lists.  The input can be a
068 * String, a character array, a StringBuffer, a Reader or an
069 * InputStream of some sort.  Note that when using a Reader or
070 * InputStream, the stream read position cannot be guaranteed after
071 * attempting a match (this is not a bug, but a consequence of the way
072 * regular expressions work).  Using an REMatchEnumeration can
073 * eliminate most positioning problems.
074 *
075 * <P>
076 *
077 * The optional index argument specifies the offset from the beginning
078 * of the text at which the search should start (see the descriptions
079 * of some of the execution flags for how this can affect positional
080 * pattern operators).  For a Reader or InputStream, this means an
081 * offset from the current read position, so subsequent calls with the
082 * same index argument on a Reader or an InputStream will not
083 * necessarily access the same position on the stream, whereas
084 * repeated searches at a given index in a fixed string will return
085 * consistent results.
086 *
087 * <P>
088 * You can optionally affect the execution environment by using a
089 * combination of execution flags (constants listed below).
090 * 
091 * <P>
092 * All operations on a regular expression are performed in a
093 * thread-safe manner.
094 *
095 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
096 * @version 1.1.4-dev, to be released
097 */
098
099public class RE extends REToken {
100  // This String will be returned by getVersion()
101  private static final String VERSION = "1.1.4-dev";
102
103  // The localized strings are kept in a separate file
104  private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
105
106  // These are, respectively, the first and last tokens in our linked list
107  // If there is only one token, firstToken == lastToken
108  private REToken firstToken, lastToken;
109
110  // This is the number of subexpressions in this regular expression,
111  // with a minimum value of zero.  Returned by getNumSubs()
112  private int numSubs;
113
114    /** Minimum length, in characters, of any possible match. */
115    private int minimumLength;
116
117  /**
118   * Compilation flag. Do  not  differentiate  case.   Subsequent
119   * searches  using  this  RE will be case insensitive.
120   */
121  public static final int REG_ICASE = 2;
122
123  /**
124   * Compilation flag. The match-any-character operator (dot)
125   * will match a newline character.  When set this overrides the syntax
126   * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
127   * the "/s" operator in Perl.
128   */
129  public static final int REG_DOT_NEWLINE = 4;
130
131  /**
132   * Compilation flag. Use multiline mode.  In this mode, the ^ and $
133   * anchors will match based on newlines within the input. This is
134   * equivalent to the "/m" operator in Perl.
135   */
136  public static final int REG_MULTILINE = 8;
137
138  /**
139   * Execution flag.
140   * The match-beginning operator (^) will not match at the beginning
141   * of the input string. Useful for matching on a substring when you
142   * know the context of the input is such that position zero of the
143   * input to the match test is not actually position zero of the text.
144   * <P>
145   * This example demonstrates the results of various ways of matching on
146   * a substring.
147   * <P>
148   * <CODE>
149   * String s = "food bar fool";<BR>
150   * RE exp = new RE("^foo.");<BR>
151   * REMatch m0 = exp.getMatch(s);<BR>
152   * REMatch m1 = exp.getMatch(s.substring(8));<BR>
153   * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
154   * REMatch m3 = exp.getMatch(s,8);                            <BR>
155   * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX);         <BR>
156   * <P>
157   * // Results:<BR>
158   * //  m0 = "food"<BR>
159   * //  m1 = "fool"<BR>
160   * //  m2 = null<BR>
161   * //  m3 = null<BR>
162   * //  m4 = "fool"<BR>
163   * </CODE>
164   */
165  public static final int REG_NOTBOL = 16;
166
167  /**
168   * Execution flag.
169   * The match-end operator ($) does not match at the end
170   * of the input string. Useful for matching on substrings.
171   */
172  public static final int REG_NOTEOL = 32;
173
174  /**
175   * Execution flag.
176   * When a match method is invoked that starts matching at a non-zero
177   * index into the input, treat the input as if it begins at the index
178   * given.  The effect of this flag is that the engine does not "see"
179   * any text in the input before the given index.  This is useful so
180   * that the match-beginning operator (^) matches not at position 0
181   * in the input string, but at the position the search started at
182   * (based on the index input given to the getMatch function).  See
183   * the example under REG_NOTBOL.  It also affects the use of the \&lt;
184   * and \b operators.
185   */
186  public static final int REG_ANCHORINDEX = 64;
187
188  /**
189   * Execution flag.
190   * The substitute and substituteAll methods will not attempt to
191   * interpolate occurrences of $1-$9 in the replacement text with
192   * the corresponding subexpressions.  For example, you may want to
193   * replace all matches of "one dollar" with "$1".
194   */
195  public static final int REG_NO_INTERPOLATE = 128;
196
197  /** Returns a string representing the version of the gnu.regexp package. */
198  public static final String version() {
199    return VERSION;
200  }
201
202  // Retrieves a message from the ResourceBundle
203  static final String getLocalizedMessage(String key) {
204    return messages.getString(key);
205  }
206
207  /**
208   * Constructs a regular expression pattern buffer without any compilation
209   * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
210   *
211   * @param pattern A regular expression pattern, in the form of a String,
212   *   StringBuffer or char[].  Other input types will be converted to
213   *   strings using the toString() method.
214   * @exception REException The input pattern could not be parsed.
215   * @exception NullPointerException The pattern was null.
216   */
217  public RE(Object pattern) throws REException {
218    this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
219  }
220
221  /**
222   * Constructs a regular expression pattern buffer using the specified
223   * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
224   *
225   * @param pattern A regular expression pattern, in the form of a String,
226   *   StringBuffer, or char[].  Other input types will be converted to
227   *   strings using the toString() method.
228   * @param cflags The logical OR of any combination of the compilation flags listed above.
229   * @exception REException The input pattern could not be parsed.
230   * @exception NullPointerException The pattern was null.
231   */
232  public RE(Object pattern, int cflags) throws REException {
233    this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
234  }
235
236  /**
237   * Constructs a regular expression pattern buffer using the specified
238   * compilation flags and regular expression syntax.
239   *
240   * @param pattern A regular expression pattern, in the form of a String,
241   *   StringBuffer, or char[].  Other input types will be converted to
242   *   strings using the toString() method.
243   * @param cflags The logical OR of any combination of the compilation flags listed above.
244   * @param syntax The type of regular expression syntax to use.
245   * @exception REException The input pattern could not be parsed.
246   * @exception NullPointerException The pattern was null.
247   */
248  public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
249    this(pattern,cflags,syntax,0,0);
250  }
251
252  // internal constructor used for alternation
253  private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
254    super(subIndex);
255    firstToken = first;
256    lastToken = last;
257    numSubs = subs;
258    minimumLength = minLength;
259    addToken(new RETokenEndSub(subIndex));
260  }
261
262  private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
263    super(myIndex); // Subexpression index of this token.
264    initialize(patternObj, cflags, syntax, myIndex, nextSub);
265  }
266
267    // For use by subclasses
268    protected RE() { super(0); }
269
270    // The meat of construction
271  protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
272      char[] pattern;
273    if (patternObj instanceof String) {
274      pattern = ((String) patternObj).toCharArray();
275    } else if (patternObj instanceof char[]) {
276      pattern = (char[]) patternObj;
277    } else if (patternObj instanceof StringBuffer) {
278      pattern = new char [((StringBuffer) patternObj).length()];
279      ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
280    } else {
281        pattern = patternObj.toString().toCharArray();
282    }
283
284    int pLength = pattern.length;
285
286    numSubs = 0; // Number of subexpressions in this token.
287    Vector branches = null;
288
289    // linked list of tokens (sort of -- some closed loops can exist)
290    firstToken = lastToken = null;
291
292    // Precalculate these so we don't pay for the math every time we
293    // need to access them.
294    boolean insens = ((cflags & REG_ICASE) > 0);
295
296    // Parse pattern into tokens.  Does anyone know if it's more efficient
297    // to use char[] than a String.charAt()?  I'm assuming so.
298
299    // index tracks the position in the char array
300    int index = 0;
301
302    // this will be the current parse character (pattern[index])
303    CharUnit unit = new CharUnit();
304
305    // This is used for {x,y} calculations
306    IntPair minMax = new IntPair();
307
308    // Buffer a token so we can create a TokenRepeated, etc.
309    REToken currentToken = null;
310    char ch;
311
312    while (index < pLength) {
313      // read the next character unit (including backslash escapes)
314      index = getCharUnit(pattern,index,unit);
315
316      // ALTERNATION OPERATOR
317      //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
318      //  not available if RE_LIMITED_OPS is set
319
320      // TODO: the '\n' literal here should be a test against REToken.newline,
321      // which unfortunately may be more than a single character.
322      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
323             || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
324           && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
325        // make everything up to here be a branch. create vector if nec.
326        addToken(currentToken);
327        RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
328        minimumLength = 0;
329        if (branches == null) {
330            branches = new Vector();
331        }
332        branches.addElement(theBranch);
333        firstToken = lastToken = currentToken = null;
334      }
335      
336      // INTERVAL OPERATOR:
337      //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)
338      //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
339      //
340      // OPEN QUESTION: 
341      //  what is proper interpretation of '{' at start of string?
342
343      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
344        int newIndex = getMinMax(pattern,index,minMax,syntax);
345        if (newIndex > index) {
346          if (minMax.first > minMax.second)
347            throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
348          if (currentToken == null)
349            throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
350          if (currentToken instanceof RETokenRepeated) 
351            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
352          if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
353            throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
354          if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
355            throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
356          index = newIndex;
357          currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); 
358        }
359        else {
360          addToken(currentToken);
361          currentToken = new RETokenChar(subIndex,unit.ch,insens);
362        } 
363      }
364      
365      // LIST OPERATOR:
366      //  [...] | [^...]
367
368      else if ((unit.ch == '[') && !unit.bk) {
369        Vector options = new Vector();
370        boolean negative = false;
371        char lastChar = 0;
372        if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
373        
374        // Check for initial caret, negation
375        if ((ch = pattern[index]) == '^') {
376          negative = true;
377          if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
378          ch = pattern[index];
379        }
380
381        // Check for leading right bracket literal
382        if (ch == ']') {
383          lastChar = ch;
384          if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
385        }
386
387        while ((ch = pattern[index++]) != ']') {
388          if ((ch == '-') && (lastChar != 0)) {
389            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
390            if ((ch = pattern[index]) == ']') {
391              options.addElement(new RETokenChar(subIndex,lastChar,insens));
392              lastChar = '-';
393            } else {
394              options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
395              lastChar = 0;
396              index++;
397            }
398          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
399            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
400            int posixID = -1;
401            boolean negate = false;
402            char asciiEsc = 0;
403            if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
404              switch (pattern[index]) {
405              case 'D':
406                negate = true;
407              case 'd':
408                posixID = RETokenPOSIX.DIGIT;
409                break;
410              case 'S':
411                negate = true;
412              case 's':
413                posixID = RETokenPOSIX.SPACE;
414                break;
415              case 'W':
416                negate = true;
417              case 'w':
418                posixID = RETokenPOSIX.ALNUM;
419                break;
420              }
421            }
422            else if ("nrt".indexOf(pattern[index]) != -1) {
423              switch (pattern[index]) {
424                case 'n':
425                  asciiEsc = '\n';
426                  break;
427                case 't':
428                  asciiEsc = '\t';
429                  break;
430                case 'r':
431                  asciiEsc = '\r';
432                  break;
433              }
434            }
435            if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
436            
437            if (posixID != -1) {
438              options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
439            } else if (asciiEsc != 0) {
440              lastChar = asciiEsc;
441            } else {
442              lastChar = pattern[index];
443            }
444            ++index;
445          } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
446            StringBuffer posixSet = new StringBuffer();
447            index = getPosixSet(pattern,index+1,posixSet);
448            int posixId = RETokenPOSIX.intValue(posixSet.toString());
449            if (posixId != -1)
450              options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
451          } else {
452            if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
453            lastChar = ch;
454          }
455          if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
456        } // while in list
457        // Out of list, index is one past ']'
458            
459        if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
460            
461        // Create a new RETokenOneOf
462        addToken(currentToken);
463        options.trimToSize();
464        currentToken = new RETokenOneOf(subIndex,options,negative);
465      }
466
467      // SUBEXPRESSIONS
468      //  (...) | \(...\) depending on RE_NO_BK_PARENS
469
470      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
471        boolean pure = false;
472        boolean comment = false;
473        boolean lookAhead = false;
474        boolean negativelh = false;
475        if ((index+1 < pLength) && (pattern[index] == '?')) {
476          switch (pattern[index+1]) {
477          case '!':
478            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
479              pure = true;
480              negativelh = true;
481              lookAhead = true;
482              index += 2;
483            }
484            break;
485          case '=':
486            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
487              pure = true;
488              lookAhead = true;
489              index += 2;
490            }
491            break;
492          case ':':
493            if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
494              pure = true;
495              index += 2;
496            }
497            break;
498          case '#':
499            if (syntax.get(RESyntax.RE_COMMENTS)) {
500              comment = true;
501            }
502            break;
503          default:
504            throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
505          }
506        }
507
508        if (index >= pLength) {
509            throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
510        }
511
512        // find end of subexpression
513        int endIndex = index;
514        int nextIndex = index;
515        int nested = 0;
516
517        while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
518                && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
519          if ((endIndex = nextIndex) >= pLength)
520            throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
521          else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
522            nested++;
523          else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
524            nested--;
525
526        // endIndex is now position at a ')','\)' 
527        // nextIndex is end of string or position after ')' or '\)'
528
529        if (comment) index = nextIndex;
530        else { // not a comment
531          // create RE subexpression as token.
532          addToken(currentToken);
533          if (!pure) {
534            numSubs++;
535          }
536
537          int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
538          currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
539          numSubs += ((RE) currentToken).getNumSubs();
540
541          if (lookAhead) {
542              currentToken = new RETokenLookAhead(currentToken,negativelh);
543          }
544
545          index = nextIndex;
546        } // not a comment
547      } // subexpression
548    
549      // UNMATCHED RIGHT PAREN
550      // ) or \) throw exception if
551      // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
552      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
553        throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
554      }
555
556      // START OF LINE OPERATOR
557      //  ^
558
559      else if ((unit.ch == '^') && !unit.bk) {
560        addToken(currentToken);
561        currentToken = null;
562        addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
563      }
564
565      // END OF LINE OPERATOR
566      //  $
567
568      else if ((unit.ch == '$') && !unit.bk) {
569        addToken(currentToken);
570        currentToken = null;
571        addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
572      }
573
574      // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
575      //  .
576
577      else if ((unit.ch == '.') && !unit.bk) {
578        addToken(currentToken);
579        currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
580      }
581
582      // ZERO-OR-MORE REPEAT OPERATOR
583      //  *
584
585      else if ((unit.ch == '*') && !unit.bk) {
586        if (currentToken == null)
587          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
588        if (currentToken instanceof RETokenRepeated)
589          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
590        if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
591          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
592        if (currentToken.getMinimumLength() == 0)
593          throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
594        currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
595      }
596
597      // ONE-OR-MORE REPEAT OPERATOR
598      //  + | \+ depending on RE_BK_PLUS_QM
599      //  not available if RE_LIMITED_OPS is set
600
601      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
602        if (currentToken == null)
603          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
604        if (currentToken instanceof RETokenRepeated)
605          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
606        if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
607          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
608        if (currentToken.getMinimumLength() == 0)
609          throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
610        currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
611      }
612
613      // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
614      //  ? | \? depending on RE_BK_PLUS_QM
615      //  not available if RE_LIMITED_OPS is set
616      //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier
617
618      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
619        if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
620
621        // Check for stingy matching on RETokenRepeated
622        if (currentToken instanceof RETokenRepeated) {
623          if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
624            ((RETokenRepeated)currentToken).makeStingy();
625          else
626            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
627        }
628        else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
629          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
630        else
631          currentToken = setRepeated(currentToken,0,1,index);
632      }
633        
634      // BACKREFERENCE OPERATOR
635      //  \1 \2 ... \9
636      // not available if RE_NO_BK_REFS is set
637
638      else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
639        addToken(currentToken);
640        currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
641      }
642
643      // START OF STRING OPERATOR
644      //  \A if RE_STRING_ANCHORS is set
645      
646      else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
647        addToken(currentToken);
648        currentToken = new RETokenStart(subIndex,null);
649      }
650
651      // WORD BREAK OPERATOR
652      //  \b if ????
653
654      else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
655          addToken(currentToken);
656          currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
657      } 
658
659      // WORD BEGIN OPERATOR 
660      //  \< if ????
661      else if (unit.bk && (unit.ch == '<')) {
662          addToken(currentToken);
663          currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
664      } 
665
666      // WORD END OPERATOR 
667      //  \> if ????
668      else if (unit.bk && (unit.ch == '>')) {
669          addToken(currentToken);
670          currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
671      } 
672
673      // NON-WORD BREAK OPERATOR
674      // \B if ????
675
676      else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
677          addToken(currentToken);
678          currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
679      } 
680
681      
682      // DIGIT OPERATOR
683      //  \d if RE_CHAR_CLASS_ESCAPES is set
684      
685      else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
686        addToken(currentToken);
687        currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
688      }
689
690      // NON-DIGIT OPERATOR
691      //  \D
692
693        else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
694          addToken(currentToken);
695          currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
696        }
697
698        // NEWLINE ESCAPE
699        //  \n
700
701        else if (unit.bk && (unit.ch == 'n')) {
702          addToken(currentToken);
703          currentToken = new RETokenChar(subIndex,'\n',false);
704        }
705
706        // RETURN ESCAPE
707        //  \r
708
709        else if (unit.bk && (unit.ch == 'r')) {
710          addToken(currentToken);
711          currentToken = new RETokenChar(subIndex,'\r',false);
712        }
713
714        // WHITESPACE OPERATOR
715        //  \s if RE_CHAR_CLASS_ESCAPES is set
716
717        else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
718          addToken(currentToken);
719          currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
720        }
721
722        // NON-WHITESPACE OPERATOR
723        //  \S
724
725        else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
726          addToken(currentToken);
727          currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
728        }
729
730        // TAB ESCAPE
731        //  \t
732
733        else if (unit.bk && (unit.ch == 't')) {
734          addToken(currentToken);
735          currentToken = new RETokenChar(subIndex,'\t',false);
736        }
737
738        // ALPHANUMERIC OPERATOR
739        //  \w
740
741        else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
742          addToken(currentToken);
743          currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
744        }
745
746        // NON-ALPHANUMERIC OPERATOR
747        //  \W
748
749        else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
750          addToken(currentToken);
751          currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
752        }
753
754        // END OF STRING OPERATOR
755        //  \Z
756
757        else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
758          addToken(currentToken);
759          currentToken = new RETokenEnd(subIndex,null);
760        }
761
762        // NON-SPECIAL CHARACTER (or escape to make literal)
763        //  c | \* for example
764
765        else {  // not a special character
766          addToken(currentToken);
767          currentToken = new RETokenChar(subIndex,unit.ch,insens);
768        } 
769      } // end while
770
771    // Add final buffered token and an EndSub marker
772    addToken(currentToken);
773      
774    if (branches != null) {
775        branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
776        branches.trimToSize(); // compact the Vector
777        minimumLength = 0;
778        firstToken = lastToken = null;
779        addToken(new RETokenOneOf(subIndex,branches,false));
780    } 
781    else addToken(new RETokenEndSub(subIndex));
782
783  }
784
785  private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
786    unit.ch = input[index++];
787    if (unit.bk = (unit.ch == '\\'))
788      if (index < input.length)
789        unit.ch = input[index++];
790      else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
791    return index;
792  }
793
794  /**
795   * Checks if the regular expression matches the input in its entirety.
796   *
797   * @param input The input text.
798   */
799  public boolean isMatch(Object input) {
800    return isMatch(input,0,0);
801  }
802  
803  /**
804   * Checks if the input string, starting from index, is an exact match of
805   * this regular expression.
806   *
807   * @param input The input text.
808   * @param index The offset index at which the search should be begin.
809   */
810  public boolean isMatch(Object input,int index) {
811    return isMatch(input,index,0);
812  }
813  
814
815  /**
816   * Checks if the input, starting from index and using the specified
817   * execution flags, is an exact match of this regular expression.
818   *
819   * @param input The input text.
820   * @param index The offset index at which the search should be begin.
821   * @param eflags The logical OR of any execution flags above.
822   */
823  public boolean isMatch(Object input,int index,int eflags) {
824    return isMatchImpl(makeCharIndexed(input,index),index,eflags);
825  }
826
827  private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
828    if (firstToken == null)  // Trivial case
829      return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
830    REMatch m = new REMatch(numSubs, index, eflags);
831    if (firstToken.match(input, m)) {
832        while (m != null) {
833            if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
834                return true;
835            }
836            m = m.next;
837        }
838    }
839    return false;
840  }
841    
842  /**
843   * Returns the maximum number of subexpressions in this regular expression.
844   * If the expression contains branches, the value returned will be the
845   * maximum subexpressions in any of the branches.
846   */
847  public int getNumSubs() {
848    return numSubs;
849  }
850
851  // Overrides REToken.setUncle
852  void setUncle(REToken uncle) {
853      if (lastToken != null) {
854          lastToken.setUncle(uncle);
855      } else super.setUncle(uncle); // to deal with empty subexpressions
856  }
857
858  // Overrides REToken.chain
859
860  boolean chain(REToken next) {
861    super.chain(next);
862    setUncle(next);
863    return true;
864  }
865
866  /**
867   * Returns the minimum number of characters that could possibly
868   * constitute a match of this regular expression.
869   */
870  public int getMinimumLength() {
871      return minimumLength;
872  }
873
874  /**
875   * Returns an array of all matches found in the input.
876   *
877   * If the regular expression allows the empty string to match, it will
878   * substitute matches at all positions except the end of the input.
879   *
880   * @param input The input text.
881   * @return a non-null (but possibly zero-length) array of matches
882   */
883  public REMatch[] getAllMatches(Object input) {
884    return getAllMatches(input,0,0);
885  }
886
887  /**
888   * Returns an array of all matches found in the input,
889   * beginning at the specified index position.
890   *
891   * If the regular expression allows the empty string to match, it will
892   * substitute matches at all positions except the end of the input.
893   *
894   * @param input The input text.
895   * @param index The offset index at which the search should be begin.
896   * @return a non-null (but possibly zero-length) array of matches
897   */
898  public REMatch[] getAllMatches(Object input, int index) {
899    return getAllMatches(input,index,0);
900  }
901
902  /**
903   * Returns an array of all matches found in the input string,
904   * beginning at the specified index position and using the specified
905   * execution flags.
906   *
907   * If the regular expression allows the empty string to match, it will
908   * substitute matches at all positions except the end of the input.
909   *
910   * @param input The input text.
911   * @param index The offset index at which the search should be begin.
912   * @param eflags The logical OR of any execution flags above.
913   * @return a non-null (but possibly zero-length) array of matches
914   */
915  public REMatch[] getAllMatches(Object input, int index, int eflags) {
916    return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
917  }
918
919  // this has been changed since 1.03 to be non-overlapping matches
920  private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
921    Vector all = new Vector();
922    REMatch m = null;
923    while ((m = getMatchImpl(input,index,eflags,null)) != null) {
924      all.addElement(m);
925      index = m.getEndIndex();
926      if (m.end[0] == 0) {   // handle pathological case of zero-length match
927        index++;
928        input.move(1);
929      } else {
930        input.move(m.end[0]);
931      }
932      if (!input.isValid()) break;
933    }
934    REMatch[] mset = new REMatch[all.size()];
935    all.copyInto(mset);
936    return mset;
937  }
938  
939    /* Implements abstract method REToken.match() */
940    boolean match(CharIndexed input, REMatch mymatch) { 
941        if (firstToken == null) return next(input, mymatch);
942
943        // Note the start of this subexpression
944        mymatch.start[subIndex] = mymatch.index;
945
946        return firstToken.match(input, mymatch);
947    }
948  
949  /**
950   * Returns the first match found in the input.  If no match is found,
951   * null is returned.
952   *
953   * @param input The input text.
954   * @return An REMatch instance referencing the match, or null if none.
955   */
956  public REMatch getMatch(Object input) {
957    return getMatch(input,0,0);
958  }
959  
960  /**
961   * Returns the first match found in the input, beginning
962   * the search at the specified index.  If no match is found,
963   * returns null.
964   *
965   * @param input The input text.
966   * @param index The offset within the text to begin looking for a match.
967   * @return An REMatch instance referencing the match, or null if none.
968   */
969  public REMatch getMatch(Object input, int index) {
970    return getMatch(input,index,0);
971  }
972  
973  /**
974   * Returns the first match found in the input, beginning
975   * the search at the specified index, and using the specified
976   * execution flags.  If no match is found, returns null.
977   *
978   * @param input The input text.
979   * @param index The offset index at which the search should be begin.
980   * @param eflags The logical OR of any execution flags above.
981   * @return An REMatch instance referencing the match, or null if none.
982   */
983  public REMatch getMatch(Object input, int index, int eflags) {
984    return getMatch(input,index,eflags,null);
985  }
986
987  /**
988   * Returns the first match found in the input, beginning the search
989   * at the specified index, and using the specified execution flags.
990   * If no match is found, returns null.  If a StringBuffer is
991   * provided and is non-null, the contents of the input text from the
992   * index to the beginning of the match (or to the end of the input,
993   * if there is no match) are appended to the StringBuffer.
994   *
995   * @param input The input text.
996   * @param index The offset index at which the search should be begin.
997   * @param eflags The logical OR of any execution flags above.
998   * @param buffer The StringBuffer to save pre-match text in.
999   * @return An REMatch instance referencing the match, or null if none.  */
1000  public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1001    return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1002  }
1003
1004  REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1005      // Create a new REMatch to hold results
1006      REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1007      do {
1008          // Optimization: check if anchor + minimumLength > length
1009          if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1010              if (match(input, mymatch)) {
1011                  // Find longest match of them all to observe leftmost longest
1012                  REMatch longest = mymatch;
1013                  while ((mymatch = mymatch.next) != null) {
1014                      if (mymatch.index > longest.index) {
1015                          longest = mymatch;
1016                      }
1017                  }
1018                  
1019                  longest.end[0] = longest.index;
1020                  longest.finish(input);
1021                  return longest;
1022              }
1023          }
1024          mymatch.clear(++anchor);
1025          // Append character to buffer if needed
1026          if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1027              buffer.append(input.charAt(0));
1028          }
1029      } while (input.move(1));
1030      
1031      return null;
1032  }
1033
1034  /**
1035   * Returns an REMatchEnumeration that can be used to iterate over the
1036   * matches found in the input text.
1037   *
1038   * @param input The input text.
1039   * @return A non-null REMatchEnumeration instance.
1040   */
1041  public REMatchEnumeration getMatchEnumeration(Object input) {
1042    return getMatchEnumeration(input,0,0);
1043  }
1044
1045
1046  /**
1047   * Returns an REMatchEnumeration that can be used to iterate over the
1048   * matches found in the input text.
1049   *
1050   * @param input The input text.
1051   * @param index The offset index at which the search should be begin.
1052   * @return A non-null REMatchEnumeration instance, with its input cursor
1053   *  set to the index position specified.
1054   */
1055  public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1056    return getMatchEnumeration(input,index,0);
1057  }
1058
1059  /**
1060   * Returns an REMatchEnumeration that can be used to iterate over the
1061   * matches found in the input text.
1062   *
1063   * @param input The input text.
1064   * @param index The offset index at which the search should be begin.
1065   * @param eflags The logical OR of any execution flags above.
1066   * @return A non-null REMatchEnumeration instance, with its input cursor
1067   *  set to the index position specified.
1068   */
1069  public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1070    return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1071  }
1072
1073
1074  /**
1075   * Substitutes the replacement text for the first match found in the input.
1076   *
1077   * @param input The input text.
1078   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1079   * @return A String interpolating the substituted text.
1080   * @see REMatch#substituteInto
1081   */
1082  public String substitute(Object input,String replace) {
1083    return substitute(input,replace,0,0);
1084  }
1085
1086  /**
1087   * Substitutes the replacement text for the first match found in the input
1088   * beginning at the specified index position.  Specifying an index
1089   * effectively causes the regular expression engine to throw away the
1090   * specified number of characters. 
1091   *
1092   * @param input The input text.
1093   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1094   * @param index The offset index at which the search should be begin.
1095   * @return A String containing the substring of the input, starting
1096   *   at the index position, and interpolating the substituted text.
1097   * @see REMatch#substituteInto
1098   */
1099  public String substitute(Object input,String replace,int index) {
1100    return substitute(input,replace,index,0);
1101  }
1102
1103  /**
1104   * Substitutes the replacement text for the first match found in the input
1105   * string, beginning at the specified index position and using the
1106   * specified execution flags.
1107   *
1108   * @param input The input text.
1109   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1110   * @param index The offset index at which the search should be begin.
1111   * @param eflags The logical OR of any execution flags above.
1112   * @return A String containing the substring of the input, starting
1113   *   at the index position, and interpolating the substituted text.
1114   * @see REMatch#substituteInto
1115   */
1116  public String substitute(Object input,String replace,int index,int eflags) {
1117    return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1118  }
1119
1120  private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1121    StringBuffer buffer = new StringBuffer();
1122    REMatch m = getMatchImpl(input,index,eflags,buffer);
1123    if (m==null) return buffer.toString();
1124    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1125                   replace : m.substituteInto(replace) );
1126    if (input.move(m.end[0])) {
1127      do {
1128        buffer.append(input.charAt(0));
1129      } while (input.move(1));
1130    }
1131    return buffer.toString();
1132  }
1133  
1134  /**
1135   * Substitutes the replacement text for each non-overlapping match found 
1136   * in the input text.
1137   *
1138   * @param input The input text.
1139   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1140   * @return A String interpolating the substituted text.
1141   * @see REMatch#substituteInto
1142   */
1143  public String substituteAll(Object input,String replace) {
1144    return substituteAll(input,replace,0,0);
1145  }
1146
1147  /**
1148   * Substitutes the replacement text for each non-overlapping match found 
1149   * in the input text, starting at the specified index.
1150   *
1151   * If the regular expression allows the empty string to match, it will
1152   * substitute matches at all positions except the end of the input.
1153   *
1154   * @param input The input text.
1155   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1156   * @param index The offset index at which the search should be begin.
1157   * @return A String containing the substring of the input, starting
1158   *   at the index position, and interpolating the substituted text.
1159   * @see REMatch#substituteInto
1160   */
1161  public String substituteAll(Object input,String replace,int index) {
1162    return substituteAll(input,replace,index,0);
1163  }
1164 
1165  /**
1166   * Substitutes the replacement text for each non-overlapping match found 
1167   * in the input text, starting at the specified index and using the
1168   * specified execution flags.
1169   *
1170   * @param input The input text.
1171   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1172   * @param index The offset index at which the search should be begin.
1173   * @param eflags The logical OR of any execution flags above.
1174   * @return A String containing the substring of the input, starting
1175   *   at the index position, and interpolating the substituted text.
1176   * @see REMatch#substituteInto
1177   */
1178  public String substituteAll(Object input,String replace,int index,int eflags) {
1179    return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1180  }
1181
1182  private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1183    StringBuffer buffer = new StringBuffer();
1184    REMatch m;
1185    while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1186        buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1187                       replace : m.substituteInto(replace) );
1188      index = m.getEndIndex();
1189      if (m.end[0] == 0) {
1190        char ch = input.charAt(0);
1191        if (ch != CharIndexed.OUT_OF_BOUNDS) 
1192            buffer.append(ch);
1193        input.move(1);
1194      } else {
1195          input.move(m.end[0]);
1196      }
1197
1198      if (!input.isValid()) break;
1199    }
1200    return buffer.toString();
1201  }
1202  
1203  /* Helper function for constructor */
1204  private void addToken(REToken next) {
1205    if (next == null) return;
1206    minimumLength += next.getMinimumLength();
1207    if (firstToken == null) {
1208        lastToken = firstToken = next;
1209    } else {
1210      // if chain returns false, it "rejected" the token due to
1211      // an optimization, and next was combined with lastToken
1212      if (lastToken.chain(next)) {
1213          lastToken = next;
1214      }
1215    }
1216  }
1217
1218  private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1219    if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1220    return new RETokenRepeated(current.subIndex,current,min,max);
1221  }
1222
1223  private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1224    // Precondition: pattern[index-1] == ':'
1225    // we will return pos of closing ']'.
1226    int i;
1227    for (i=index; i<(pattern.length-1); i++) {
1228      if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1229        return i+2;
1230      buf.append(pattern[i]);
1231    }
1232    return index; // didn't match up
1233  }
1234
1235  private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
1236    // Precondition: input[index-1] == '{', minMax != null
1237
1238    boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
1239    int startIndex = index;
1240    if (index == input.length) {
1241      if (mustMatch)
1242        throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
1243      else
1244        return startIndex;
1245    }
1246    
1247    int min,max=0;
1248    CharUnit unit = new CharUnit();
1249    StringBuffer buf = new StringBuffer();
1250    
1251    // Read string of digits
1252    do {
1253      index = getCharUnit(input,index,unit);
1254      if (Character.isDigit(unit.ch))
1255        buf.append(unit.ch);
1256    } while ((index != input.length) && Character.isDigit(unit.ch));
1257
1258    // Check for {} tomfoolery
1259    if (buf.length() == 0) {
1260      if (mustMatch)
1261        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1262      else
1263        return startIndex;
1264    }
1265
1266    min = Integer.parseInt(buf.toString());
1267        
1268    if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
1269      max = min;
1270    else if (index == input.length)
1271      if (mustMatch)
1272        throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
1273      else
1274        return startIndex;
1275    else if ((unit.ch == ',') && !unit.bk) {
1276      buf = new StringBuffer();
1277      // Read string of digits
1278      while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
1279        buf.append(unit.ch);
1280
1281      if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
1282        if (mustMatch)
1283          throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1284        else
1285          return startIndex;
1286
1287      // This is the case of {x,}
1288      if (buf.length() == 0) max = Integer.MAX_VALUE;
1289      else max = Integer.parseInt(buf.toString());
1290    } else
1291      if (mustMatch)
1292        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1293      else
1294        return startIndex;
1295
1296    // We know min and max now, and they are valid.
1297
1298    minMax.first = min;
1299    minMax.second = max;
1300
1301    // return the index following the '}'
1302    return index;
1303  }
1304
1305   /**
1306    * Return a human readable form of the compiled regular expression,
1307    * useful for debugging.
1308    */
1309   public String toString() {
1310     StringBuffer sb = new StringBuffer();
1311     dump(sb);
1312     return sb.toString();
1313   }
1314
1315  void dump(StringBuffer os) {
1316    os.append('(');
1317    if (subIndex == 0)
1318      os.append("?:");
1319    if (firstToken != null)
1320      firstToken.dumpAll(os);
1321    os.append(')');
1322  }
1323
1324  // Cast input appropriately or throw exception
1325  private static CharIndexed makeCharIndexed(Object input, int index) {
1326      // We could let a String fall through to final input, but since
1327      // it's the most likely input type, we check it first.
1328    if (input instanceof String)
1329      return new CharIndexedString((String) input,index);
1330    else if (input instanceof char[])
1331      return new CharIndexedCharArray((char[]) input,index);
1332    else if (input instanceof StringBuffer)
1333      return new CharIndexedStringBuffer((StringBuffer) input,index);
1334    else if (input instanceof InputStream)
1335      return new CharIndexedInputStream((InputStream) input,index);
1336    else if (input instanceof Reader)
1337        return new CharIndexedReader((Reader) input, index);
1338    else if (input instanceof CharIndexed)
1339        return (CharIndexed) input; // do we lose index info?
1340    else 
1341        return new CharIndexedString(input.toString(), index);
1342  }
1343}