001/*
002 *  gnu/regexp/RETokenRepeated.java
003 *  Copyright (C) 1998-2001 Wes Biggs
004 *
005 *  This library is free software; you can redistribute it and/or modify
006 *  it under the terms of the GNU Lesser General Public License as published
007 *  by the Free Software Foundation; either version 2.1 of the License, or
008 *  (at your option) any later version.
009 *
010 *  This library is distributed in the hope that it will be useful,
011 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
012 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 *  GNU Lesser General Public License for more details.
014 *
015 *  You should have received a copy of the GNU Lesser General Public License
016 *  along with this program; if not, write to the Free Software
017 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
018 */
019
020package gnu.regexp;
021import java.util.Vector;
022
023final class RETokenRepeated extends REToken {
024    private REToken token;
025    private int min,max;
026    private boolean stingy;
027    
028    RETokenRepeated(int subIndex, REToken token, int min, int max) {
029        super(subIndex);
030        this.token = token;
031        this.min = min;
032        this.max = max;
033    }
034
035    /** Sets the minimal matching mode to true. */
036    void makeStingy() {
037        stingy = true;
038    }
039    
040    /** Queries if this token has minimal matching enabled. */
041    boolean isStingy() {
042        return stingy;
043    }
044    
045    /**
046     * The minimum length of a repeated token is the minimum length
047     * of the token multiplied by the minimum number of times it must
048     * match.
049     */
050    int getMinimumLength() {
051        return (min * token.getMinimumLength());
052    }
053
054    // We do need to save every possible point, but the number of clone()
055    // invocations here is really a killer for performance on non-stingy
056    // repeat operators.  I'm open to suggestions...
057
058    // Hypothetical question: can you have a RE that matches 1 times,
059    // 3 times, 5 times, but not 2 times or 4 times?  Does having
060    // the subexpression back-reference operator allow that?
061
062    boolean match(CharIndexed input, REMatch mymatch) {
063        // number of times we've matched so far
064        int numRepeats = 0; 
065        
066        // Possible positions for the next repeat to match at
067        REMatch newMatch = mymatch;
068        REMatch last = null;
069        REMatch current;
070
071        // Add the '0-repeats' index
072        // positions.elementAt(z) == position [] in input after <<z>> matches
073        Vector positions = new Vector();
074        positions.addElement(newMatch);
075        
076        // Declare variables used in loop
077        REMatch doables;
078        REMatch doablesLast;
079        REMatch recurrent;
080
081        do {
082            // Check for stingy match for each possibility.
083            if (stingy && (numRepeats >= min)) {
084                REMatch result = matchRest(input, newMatch);
085                if (result != null) {
086                    mymatch.assignFrom(result);
087                    return true;
088                }
089            }
090
091            doables = null;
092            doablesLast = null;
093
094            // try next repeat at all possible positions
095            for (current = newMatch; current != null; current = current.next) {
096                recurrent = (REMatch) current.clone();
097                if (token.match(input, recurrent)) {
098                    // add all items in current to doables array
099                    if (doables == null) {
100                        doables = recurrent;
101                        doablesLast = recurrent;
102                    } else {
103                        // Order these from longest to shortest
104                        // Start by assuming longest (more repeats)
105                        doablesLast.next = recurrent;
106                    }
107                    // Find new doablesLast
108                    while (doablesLast.next != null) {
109                        doablesLast = doablesLast.next;
110                    }
111                }
112            }
113            // if none of the possibilities worked out, break out of do/while
114            if (doables == null) break;
115            
116            // reassign where the next repeat can match
117            newMatch = doables;
118            
119            // increment how many repeats we've successfully found
120            ++numRepeats;
121            
122            positions.addElement(newMatch);
123        } while (numRepeats < max);
124        
125        // If there aren't enough repeats, then fail
126        if (numRepeats < min) return false;
127        
128        // We're greedy, but ease off until a true match is found 
129        int posIndex = positions.size();
130        
131        // At this point we've either got too many or just the right amount.
132        // See if this numRepeats works with the rest of the regexp.
133        REMatch allResults = null;
134        REMatch allResultsLast = null;
135
136        REMatch results = null;
137        while (--posIndex >= min) {
138            newMatch = (REMatch) positions.elementAt(posIndex);
139            results = matchRest(input, newMatch);
140            if (results != null) {
141                if (allResults == null) {
142                    allResults = results;
143                    allResultsLast = results;
144                } else {
145                    // Order these from longest to shortest
146                    // Start by assuming longest (more repeats)
147                    allResultsLast.next = results;
148                }
149                // Find new doablesLast
150                while (allResultsLast.next != null) {
151                    allResultsLast = allResultsLast.next;
152                }
153            }
154            // else did not match rest of the tokens, try again on smaller sample
155        }
156        if (allResults != null) {
157            mymatch.assignFrom(allResults); // does this get all?
158            return true;
159        }
160        // If we fall out, no matches.
161        return false;
162    }
163
164    private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
165        REMatch current, single;
166        REMatch doneIndex = null;
167        REMatch doneIndexLast = null;
168        // Test all possible matches for this number of repeats
169        for (current = newMatch; current != null; current = current.next) {
170            // clone() separates a single match from the chain
171            single = (REMatch) current.clone();
172            if (next(input, single)) {
173                // chain results to doneIndex
174                if (doneIndex == null) {
175                    doneIndex = single;
176                    doneIndexLast = single;
177                } else {
178                    doneIndexLast.next = single;
179                }
180                // Find new doneIndexLast
181                while (doneIndexLast.next != null) {
182                    doneIndexLast = doneIndexLast.next;
183                }
184            }
185        }
186        return doneIndex;
187    }
188
189    void dump(StringBuffer os) {
190        os.append("(?:");
191        token.dumpAll(os);
192        os.append(')');
193        if ((max == Integer.MAX_VALUE) && (min <= 1))
194            os.append( (min == 0) ? '*' : '+' );
195        else if ((min == 0) && (max == 1))
196            os.append('?');
197        else {
198            os.append('{').append(min);
199            if (max > min) {
200                os.append(',');
201                if (max != Integer.MAX_VALUE) os.append(max);
202            }
203            os.append('}');
204        }
205        if (stingy) os.append('?');
206    }
207}