001/* 002 * gnu/regexp/RETokenRepeated.java 003 * Copyright (C) 1998-2001 Wes Biggs 004 * 005 * This library is free software; you can redistribute it and/or modify 006 * it under the terms of the GNU Lesser General Public License as published 007 * by the Free Software Foundation; either version 2.1 of the License, or 008 * (at your option) any later version. 009 * 010 * This library is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU Lesser General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public License 016 * along with this program; if not, write to the Free Software 017 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 018 */ 019 020package gnu.regexp; 021import java.util.Vector; 022 023final class RETokenRepeated extends REToken { 024 private REToken token; 025 private int min,max; 026 private boolean stingy; 027 028 RETokenRepeated(int subIndex, REToken token, int min, int max) { 029 super(subIndex); 030 this.token = token; 031 this.min = min; 032 this.max = max; 033 } 034 035 /** Sets the minimal matching mode to true. */ 036 void makeStingy() { 037 stingy = true; 038 } 039 040 /** Queries if this token has minimal matching enabled. */ 041 boolean isStingy() { 042 return stingy; 043 } 044 045 /** 046 * The minimum length of a repeated token is the minimum length 047 * of the token multiplied by the minimum number of times it must 048 * match. 049 */ 050 int getMinimumLength() { 051 return (min * token.getMinimumLength()); 052 } 053 054 // We do need to save every possible point, but the number of clone() 055 // invocations here is really a killer for performance on non-stingy 056 // repeat operators. I'm open to suggestions... 057 058 // Hypothetical question: can you have a RE that matches 1 times, 059 // 3 times, 5 times, but not 2 times or 4 times? Does having 060 // the subexpression back-reference operator allow that? 061 062 boolean match(CharIndexed input, REMatch mymatch) { 063 // number of times we've matched so far 064 int numRepeats = 0; 065 066 // Possible positions for the next repeat to match at 067 REMatch newMatch = mymatch; 068 REMatch last = null; 069 REMatch current; 070 071 // Add the '0-repeats' index 072 // positions.elementAt(z) == position [] in input after <<z>> matches 073 Vector positions = new Vector(); 074 positions.addElement(newMatch); 075 076 // Declare variables used in loop 077 REMatch doables; 078 REMatch doablesLast; 079 REMatch recurrent; 080 081 do { 082 // Check for stingy match for each possibility. 083 if (stingy && (numRepeats >= min)) { 084 REMatch result = matchRest(input, newMatch); 085 if (result != null) { 086 mymatch.assignFrom(result); 087 return true; 088 } 089 } 090 091 doables = null; 092 doablesLast = null; 093 094 // try next repeat at all possible positions 095 for (current = newMatch; current != null; current = current.next) { 096 recurrent = (REMatch) current.clone(); 097 if (token.match(input, recurrent)) { 098 // add all items in current to doables array 099 if (doables == null) { 100 doables = recurrent; 101 doablesLast = recurrent; 102 } else { 103 // Order these from longest to shortest 104 // Start by assuming longest (more repeats) 105 doablesLast.next = recurrent; 106 } 107 // Find new doablesLast 108 while (doablesLast.next != null) { 109 doablesLast = doablesLast.next; 110 } 111 } 112 } 113 // if none of the possibilities worked out, break out of do/while 114 if (doables == null) break; 115 116 // reassign where the next repeat can match 117 newMatch = doables; 118 119 // increment how many repeats we've successfully found 120 ++numRepeats; 121 122 positions.addElement(newMatch); 123 } while (numRepeats < max); 124 125 // If there aren't enough repeats, then fail 126 if (numRepeats < min) return false; 127 128 // We're greedy, but ease off until a true match is found 129 int posIndex = positions.size(); 130 131 // At this point we've either got too many or just the right amount. 132 // See if this numRepeats works with the rest of the regexp. 133 REMatch allResults = null; 134 REMatch allResultsLast = null; 135 136 REMatch results = null; 137 while (--posIndex >= min) { 138 newMatch = (REMatch) positions.elementAt(posIndex); 139 results = matchRest(input, newMatch); 140 if (results != null) { 141 if (allResults == null) { 142 allResults = results; 143 allResultsLast = results; 144 } else { 145 // Order these from longest to shortest 146 // Start by assuming longest (more repeats) 147 allResultsLast.next = results; 148 } 149 // Find new doablesLast 150 while (allResultsLast.next != null) { 151 allResultsLast = allResultsLast.next; 152 } 153 } 154 // else did not match rest of the tokens, try again on smaller sample 155 } 156 if (allResults != null) { 157 mymatch.assignFrom(allResults); // does this get all? 158 return true; 159 } 160 // If we fall out, no matches. 161 return false; 162 } 163 164 private REMatch matchRest(CharIndexed input, final REMatch newMatch) { 165 REMatch current, single; 166 REMatch doneIndex = null; 167 REMatch doneIndexLast = null; 168 // Test all possible matches for this number of repeats 169 for (current = newMatch; current != null; current = current.next) { 170 // clone() separates a single match from the chain 171 single = (REMatch) current.clone(); 172 if (next(input, single)) { 173 // chain results to doneIndex 174 if (doneIndex == null) { 175 doneIndex = single; 176 doneIndexLast = single; 177 } else { 178 doneIndexLast.next = single; 179 } 180 // Find new doneIndexLast 181 while (doneIndexLast.next != null) { 182 doneIndexLast = doneIndexLast.next; 183 } 184 } 185 } 186 return doneIndex; 187 } 188 189 void dump(StringBuffer os) { 190 os.append("(?:"); 191 token.dumpAll(os); 192 os.append(')'); 193 if ((max == Integer.MAX_VALUE) && (min <= 1)) 194 os.append( (min == 0) ? '*' : '+' ); 195 else if ((min == 0) && (max == 1)) 196 os.append('?'); 197 else { 198 os.append('{').append(min); 199 if (max > min) { 200 os.append(','); 201 if (max != Integer.MAX_VALUE) os.append(max); 202 } 203 os.append('}'); 204 } 205 if (stingy) os.append('?'); 206 } 207}