001/* 002 * gnu/regexp/util/Grep.java 003 * Copyright (C) 1998 Wes Biggs 004 * Copyright (C) 2001 Lee Sau Dan for the use of Reader for handling file I/O 005 * Copyright (C) 2001 Ulf Dittmer for support of grepping into ZIP files 006 * 007 * This program is free software; you can redistribute it and/or modify 008 * it under the terms of the GNU General Public License as published 009 * by the Free Software Foundation; either version 2 of the License, or 010 * (at your option) any later version. 011 * 012 * This program is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with this program; if not, write to the Free Software 019 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 020 */ 021 022package gnu.regexp.util; 023 024import gnu.getopt.Getopt; 025import gnu.getopt.LongOpt; 026import gnu.regexp.RE; 027import gnu.regexp.REException; 028import gnu.regexp.REMatch; 029import gnu.regexp.RESyntax; 030import java.io.BufferedReader; 031import java.io.File; 032import java.io.FileInputStream; 033import java.io.FileNotFoundException; 034import java.io.InputStream; 035import java.io.InputStreamReader; 036import java.io.IOException; 037import java.io.PrintStream; 038import java.io.UnsupportedEncodingException; 039import java.util.Enumeration; 040import java.util.Vector; 041import java.util.zip.*; 042 043 044/** 045 * Grep is a pure-Java clone of the GNU grep utility. As such, it is much 046 * slower and not as full-featured, but it has the advantage of being 047 * available on any system with a Java virtual machine. 048 * 049 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> 050 * <A HREF="http://www.csis.hku.hk/~sdlee/">Lee Sau Dan</A> 051 * <A HREF="http://www.capital.net/~dittmer/">Ulf Dittmer</A> 052 * @version 1.03 053 * @use gnu.getopt 054 */ 055public class Grep { 056 private static final int BYTE_OFFSET = 0; 057 private static final int COUNT = 1; 058 private static final int LINE_NUMBER = 2; 059 private static final int QUIET = 3; 060 private static final int SILENT = 4; 061 private static final int NO_FILENAME = 5; 062 private static final int REVERT_MATCH = 6; 063 private static final int FILES_WITH_MATCHES = 7; 064 private static final int LINE_REGEXP = 8; 065 private static final int FILES_WITHOUT_MATCH = 9; 066 private static final int EXPAND_ZIP_FILES = 10; 067 068 private static final String PROGNAME = "gnu.regexp.util.Grep"; 069 private static final String PROGVERSION = "1.03"; 070 071 private Grep() { } 072 /** 073 * Invokes the grep() function below with the command line arguments 074 * and using the RESyntax.RE_SYNTAX_GREP syntax, which attempts to 075 * emulate the traditional UNIX grep syntax. 076 */ 077 public static void main(String[] argv) { 078 System.exit(grep(argv, RESyntax.RE_SYNTAX_GREP, System.out)); 079 } 080 081 /** 082 * Runs Grep with the specified arguments. For a list of 083 * supported options, specify "--help". 084 * 085 * This is the meat of the grep routine, but unlike main(), you can 086 * specify your own syntax and PrintStream to use for output. 087 */ 088 public static int grep(String[] argv, RESyntax syntax, PrintStream out) { 089 // use gnu.getopt to read arguments 090 int cflags = 0; 091 092 boolean[] options = new boolean [10]; 093 094 String encoding = null; 095 096 LongOpt[] longOptions = { 097 new LongOpt("byte-offset", LongOpt.NO_ARGUMENT, null, 'b'), 098 new LongOpt("count", LongOpt.NO_ARGUMENT, null, 'c'), 099 new LongOpt("no-filename", LongOpt.NO_ARGUMENT, null, 'h'), 100 new LongOpt("ignore-case", LongOpt.NO_ARGUMENT, null, 'i'), 101 new LongOpt("files-with-matches", LongOpt.NO_ARGUMENT, null, 'l'), 102 new LongOpt("help", LongOpt.NO_ARGUMENT, null, '!'), 103 new LongOpt("line-number", LongOpt.NO_ARGUMENT, null, 'n'), 104 new LongOpt("quiet", LongOpt.NO_ARGUMENT, null, 'q'), 105 new LongOpt("silent", LongOpt.NO_ARGUMENT, null, 'q'), 106 new LongOpt("no-messages", LongOpt.NO_ARGUMENT, null, 's'), 107 new LongOpt("revert-match", LongOpt.NO_ARGUMENT, null, 'v'), 108 new LongOpt("line-regexp", LongOpt.NO_ARGUMENT, null, 'x'), 109 new LongOpt("extended-regexp", LongOpt.NO_ARGUMENT, null, 'E'), 110 new LongOpt("fixed-strings", LongOpt.NO_ARGUMENT, null, 'F'), // TODO 111 new LongOpt("basic-regexp", LongOpt.NO_ARGUMENT, null, 'G'), 112 new LongOpt("files-without-match", LongOpt.NO_ARGUMENT, null, 'L'), 113 new LongOpt("version", LongOpt.NO_ARGUMENT, null, 'V'), 114 new LongOpt("zip", LongOpt.NO_ARGUMENT, null, 'z'), 115 new LongOpt("encoding", LongOpt.REQUIRED_ARGUMENT, null, 'N') 116 }; 117 118 Getopt g = new Getopt(PROGNAME, argv, "bchilnqsvxyEFGLVzN:", longOptions); 119 int c; 120 String arg; 121 while ((c = g.getopt()) != -1) { 122 switch (c) { 123 case 'b': 124 options[BYTE_OFFSET] = true; 125 break; 126 case 'c': 127 options[COUNT] = true; 128 break; 129 case 'h': 130 options[NO_FILENAME] = true; 131 break; 132 case 'i': 133 case 'y': 134 cflags |= RE.REG_ICASE; 135 break; 136 case 'l': 137 options[FILES_WITH_MATCHES] = true; 138 break; 139 case 'n': 140 options[LINE_NUMBER] = true; 141 break; 142 case 'q': 143 options[QUIET] = true; 144 break; 145 case 's': 146 options[SILENT] = true; 147 break; 148 case 'v': 149 options[REVERT_MATCH] = true; 150 break; 151 case 'x': 152 options[LINE_REGEXP] = true; 153 break; 154 case 'E': // TODO: check compatibility with grep 155 syntax = RESyntax.RE_SYNTAX_EGREP; 156 break; 157 case 'F': // TODO: fixed strings 158 break; 159 case 'G': 160 syntax = RESyntax.RE_SYNTAX_GREP; 161 break; 162 case 'L': 163 options[FILES_WITHOUT_MATCH] = true; 164 break; 165 case 'V': 166 System.err.println(PROGNAME+' '+PROGVERSION); 167 return 0; 168 case 'z': 169 options[EXPAND_ZIP_FILES] = true; 170 break; 171 case 'N': 172 encoding = g.getOptarg(); 173 try { // try out this encoding now. If not found, fall back to default 174 "".getBytes(encoding); 175 } catch (UnsupportedEncodingException uee) { 176 System.err.println(PROGNAME+": (Warning)" 177 + " Unsupported Encoding: " + encoding 178 + "; reverting to default"); 179 encoding = null; 180 } 181 break; 182 case '!': // help 183 try { 184 BufferedReader br = new BufferedReader(new InputStreamReader((Grep.class).getResourceAsStream("GrepUsage.txt"),"UTF8")); 185 String line; 186 while ((line = br.readLine()) != null) 187 out.println(line); 188 } catch (IOException ie) { } 189 return 0; 190 } 191 } 192 193 InputStream is = null; 194 RE pattern = null; 195 if (g.getOptind() >= argv.length) { 196 System.err.println("Usage: java " + PROGNAME + " [OPTION]... PATTERN [FILE]..."); 197 System.err.println("Try `java " + PROGNAME + " --help' for more information."); 198 return 2; 199 } 200 try { 201 pattern = new RE(argv[g.getOptind()],cflags,syntax); 202 } catch (REException e) { 203 System.err.println("Error in expression: "+e); 204 return 2; 205 } 206 207 boolean notFound = true; 208 if (argv.length >= g.getOptind()+2) { 209 for (int i = g.getOptind() + 1; i < argv.length; i++) { 210 boolean no_filename = (argv.length == g.getOptind()+2) 211 || options[NO_FILENAME]; 212 if (argv[i].equals("-")) { 213 final String filename = no_filename ? null : "(standard input)"; 214 if (processStream(pattern,System.in,encoding,options,filename,null,out)) 215 notFound = false; 216 } else { 217 final String filename = no_filename ? null : argv[i]; 218 try { 219 File file = new File(argv[i]); 220 if(file.isDirectory()) { 221 System.err.println(PROGNAME + ": " + argv[i] + ": Is a directory"); 222 } else if(!file.canRead()) { 223 System.err.println(PROGNAME + ": " + argv[i] + ": Permission denied"); 224 } else if (options[EXPAND_ZIP_FILES] && argv[i].endsWith(".zip")) { 225 // iterate over all files within this ZIP file 226 try { 227 ZipFile zf = new ZipFile(file); 228 Enumeration list = zf.entries(); 229 while (list.hasMoreElements()) { 230 ZipEntry ze = (ZipEntry) list.nextElement(); 231 if (! ze.isDirectory()) { 232 if (processStream(pattern, zf.getInputStream(ze), encoding, options, filename, ze.getName(), out)) 233 notFound = false; 234 } 235 } 236 } catch (Exception ex) { 237 System.err.println(PROGNAME + ": " + argv[i] + ": Problem reading ZIP file"); 238 return 2; 239 } 240 } else { 241 if (processStream(pattern, 242 new FileInputStream(argv[i]), 243 encoding, options, filename, null, out)) 244 notFound = false; 245 } 246 } catch (FileNotFoundException e) { 247 if (!options[SILENT]) 248 System.err.println(PROGNAME+": "+e); 249 } 250 } 251 } 252 } else { 253 if (processStream(pattern,System.in,encoding,options,null,null,out)) 254 notFound = false; 255 } 256 return notFound ? 1 : 0; 257 } 258 259 private static boolean processStream(RE pattern, InputStream is, 260 String encoding, boolean[] options, 261 String filename, String zipName, 262 PrintStream out) { 263 try { 264 final InputStreamReader isr = encoding == null? 265 new InputStreamReader(is) : new InputStreamReader(is,encoding); 266 final BufferedReader r = new BufferedReader(isr); 267 return processReader(pattern, r, options, filename, zipName, out); 268 } catch (UnsupportedEncodingException uee) { 269 /* since grep() should have checked that the 'encoding' parameter 270 is valid, it should be impossible that this exception would 271 happen. Of, sso, it is a logic error. 272 */ 273 throw new Error(PROGNAME + ": programming logic error"); 274 } 275 } 276 277 private static String fileNameString (String fileName, String zipName) { 278 if (zipName == null) 279 return fileName; 280 else 281 return zipName + " in " + fileName; 282 } 283 284 private static boolean processReader(RE pattern, 285 BufferedReader br, 286 boolean[] options, String filename, 287 String zipName, PrintStream out) { 288 289 int newlineLen = System.getProperty("line.separator").length(); 290 int count = 0; 291 long atByte = 0; 292 int atLine = 1; 293 String line; 294 REMatch match; 295 296 try { 297 while ((line = br.readLine()) != null) { 298 match = pattern.getMatch(line); 299 if (((options[LINE_REGEXP] && pattern.isMatch(line)) 300 || (!options[LINE_REGEXP] && (match != null))) 301 ^ options[REVERT_MATCH]) { 302 count++; 303 if (!options[COUNT]) { 304 if (options[QUIET]) { 305 return true; 306 } 307 if (options[FILES_WITH_MATCHES]) { 308 if (filename != null) 309 out.println(fileNameString(filename, zipName)); 310 return true; 311 } 312 if (options[FILES_WITHOUT_MATCH]) { 313 return false; 314 } 315 if (filename != null) { 316 out.print(fileNameString(filename, zipName)); 317 out.print(':'); 318 } 319 if (options[LINE_NUMBER]) { 320 out.print(atLine); 321 out.print(':'); 322 } 323 if (options[BYTE_OFFSET]) { 324 out.print(atByte + match.getStartIndex() ); 325 out.print(':'); 326 } 327 out.println(line); 328 } 329 } // a match 330 atByte += line.length() + newlineLen; // could be troublesome... 331 atLine++; 332 } // a valid line 333 br.close(); 334 335 if (options[COUNT]) { 336 if (filename != null) 337 out.println(fileNameString(filename, zipName)+':'); 338 out.println(count); 339 } 340 if (options[FILES_WITHOUT_MATCH] && count==0) { 341 if (filename != null) 342 out.println(fileNameString(filename, zipName)); 343 } 344 } catch (IOException e) { 345 System.err.println(PROGNAME+": "+e); 346 } 347 return ((count > 0) ^ options[REVERT_MATCH]); 348 } 349}