001/*
002 *  gnu/regexp/util/Grep.java
003 *  Copyright (C) 1998 Wes Biggs
004 *  Copyright (C) 2001 Lee Sau Dan for the use of Reader for handling file I/O
005 *  Copyright (C) 2001 Ulf Dittmer for support of grepping into ZIP files
006 *
007 *  This program is free software; you can redistribute it and/or modify
008 *  it under the terms of the GNU General Public License as published
009 *  by the Free Software Foundation; either version 2 of the License, or
010 *  (at your option) any later version.
011 *
012 *  This program is distributed in the hope that it will be useful,
013 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
014 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015 *  GNU General Public License for more details.
016 *
017 *  You should have received a copy of the GNU General Public License
018 *  along with this program; if not, write to the Free Software
019 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
020 */
021
022package gnu.regexp.util;
023
024import gnu.getopt.Getopt;
025import gnu.getopt.LongOpt;
026import gnu.regexp.RE;
027import gnu.regexp.REException;
028import gnu.regexp.REMatch;
029import gnu.regexp.RESyntax;
030import java.io.BufferedReader;
031import java.io.File;
032import java.io.FileInputStream;
033import java.io.FileNotFoundException;
034import java.io.InputStream;
035import java.io.InputStreamReader;
036import java.io.IOException;
037import java.io.PrintStream;
038import java.io.UnsupportedEncodingException;
039import java.util.Enumeration;
040import java.util.Vector;
041import java.util.zip.*;
042
043
044/**
045 * Grep is a pure-Java clone of the GNU grep utility.  As such, it is much
046 * slower and not as full-featured, but it has the advantage of being
047 * available on any system with a Java virtual machine.
048 *
049 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
050 *         <A HREF="http://www.csis.hku.hk/~sdlee/">Lee Sau Dan</A>
051 *         <A HREF="http://www.capital.net/~dittmer/">Ulf Dittmer</A>
052 * @version 1.03
053 * @use gnu.getopt
054 */
055public class Grep {
056  private static final int BYTE_OFFSET = 0;
057  private static final int COUNT = 1;
058  private static final int LINE_NUMBER = 2;
059  private static final int QUIET = 3;
060  private static final int SILENT = 4;
061  private static final int NO_FILENAME = 5;
062  private static final int REVERT_MATCH = 6;
063  private static final int FILES_WITH_MATCHES = 7;
064  private static final int LINE_REGEXP = 8;
065  private static final int FILES_WITHOUT_MATCH = 9;
066  private static final int EXPAND_ZIP_FILES = 10;
067
068  private static final String PROGNAME = "gnu.regexp.util.Grep";
069  private static final String PROGVERSION = "1.03";
070
071  private Grep() { }
072  /**
073   * Invokes the grep() function below with the command line arguments
074   * and using the RESyntax.RE_SYNTAX_GREP syntax, which attempts to
075   * emulate the traditional UNIX grep syntax.
076   */
077  public static void main(String[] argv) {
078    System.exit(grep(argv, RESyntax.RE_SYNTAX_GREP, System.out));
079  }
080
081  /**
082   * Runs Grep with the specified arguments.  For a list of 
083   * supported options, specify "--help".
084   *
085   * This is the meat of the grep routine, but unlike main(), you can
086   * specify your own syntax and PrintStream to use for output.
087   */
088  public static int grep(String[] argv, RESyntax syntax, PrintStream out) {
089    // use gnu.getopt to read arguments
090    int cflags = 0;
091    
092    boolean[] options = new boolean [10];
093
094    String encoding = null;
095    
096    LongOpt[] longOptions = { 
097        new LongOpt("byte-offset",         LongOpt.NO_ARGUMENT, null, 'b'),
098        new LongOpt("count",               LongOpt.NO_ARGUMENT, null, 'c'),
099        new LongOpt("no-filename",         LongOpt.NO_ARGUMENT, null, 'h'),
100        new LongOpt("ignore-case",         LongOpt.NO_ARGUMENT, null, 'i'),
101        new LongOpt("files-with-matches",  LongOpt.NO_ARGUMENT, null, 'l'),
102        new LongOpt("help",                LongOpt.NO_ARGUMENT, null, '!'),
103        new LongOpt("line-number",         LongOpt.NO_ARGUMENT, null, 'n'),
104        new LongOpt("quiet",               LongOpt.NO_ARGUMENT, null, 'q'),
105        new LongOpt("silent",              LongOpt.NO_ARGUMENT, null, 'q'),
106        new LongOpt("no-messages",         LongOpt.NO_ARGUMENT, null, 's'),
107        new LongOpt("revert-match",        LongOpt.NO_ARGUMENT, null, 'v'),
108        new LongOpt("line-regexp",         LongOpt.NO_ARGUMENT, null, 'x'),
109        new LongOpt("extended-regexp",     LongOpt.NO_ARGUMENT, null, 'E'),
110        new LongOpt("fixed-strings",       LongOpt.NO_ARGUMENT, null, 'F'), // TODO
111        new LongOpt("basic-regexp",        LongOpt.NO_ARGUMENT, null, 'G'),
112        new LongOpt("files-without-match", LongOpt.NO_ARGUMENT, null, 'L'),
113        new LongOpt("version",             LongOpt.NO_ARGUMENT, null, 'V'),
114        new LongOpt("zip",                 LongOpt.NO_ARGUMENT, null, 'z'),
115        new LongOpt("encoding",      LongOpt.REQUIRED_ARGUMENT, null, 'N')
116          };
117
118    Getopt g = new Getopt(PROGNAME, argv, "bchilnqsvxyEFGLVzN:", longOptions);
119    int c;
120    String arg;
121    while ((c = g.getopt()) != -1) {
122      switch (c) {
123      case 'b':
124        options[BYTE_OFFSET] = true;
125        break;
126      case 'c':
127        options[COUNT] = true;
128        break;
129      case 'h':
130        options[NO_FILENAME] = true;
131        break;
132      case 'i':
133      case 'y':
134        cflags |= RE.REG_ICASE;
135        break;
136      case 'l':
137        options[FILES_WITH_MATCHES] = true;
138        break;
139      case 'n':
140        options[LINE_NUMBER] = true;
141        break;
142      case 'q':
143        options[QUIET] = true;
144        break;
145      case 's':
146        options[SILENT] = true;
147        break;
148      case 'v':
149        options[REVERT_MATCH] = true;
150        break;
151      case 'x':
152        options[LINE_REGEXP] = true;
153        break;
154      case 'E':  // TODO: check compatibility with grep
155        syntax = RESyntax.RE_SYNTAX_EGREP;
156        break;
157      case 'F':  // TODO: fixed strings
158        break;
159      case 'G':
160        syntax = RESyntax.RE_SYNTAX_GREP;
161        break;
162      case 'L':
163        options[FILES_WITHOUT_MATCH] = true;
164        break;
165      case 'V':
166        System.err.println(PROGNAME+' '+PROGVERSION);
167        return 0;
168      case 'z':
169        options[EXPAND_ZIP_FILES] = true;
170        break;
171      case 'N':
172        encoding = g.getOptarg();
173        try { // try out this encoding now.  If not found, fall back to default
174          "".getBytes(encoding);
175        } catch (UnsupportedEncodingException uee) {
176          System.err.println(PROGNAME+": (Warning)"
177                             + " Unsupported Encoding: " + encoding 
178                             + "; reverting to default");
179          encoding = null;
180        }
181        break;
182      case '!': // help
183          try {
184              BufferedReader br = new BufferedReader(new InputStreamReader((Grep.class).getResourceAsStream("GrepUsage.txt"),"UTF8"));
185              String line;
186              while ((line = br.readLine()) != null)
187                  out.println(line);
188          } catch (IOException ie) { }
189        return 0;
190      }
191    }         
192    
193    InputStream is = null;
194    RE pattern = null;
195    if (g.getOptind() >= argv.length) {
196      System.err.println("Usage: java " + PROGNAME + " [OPTION]... PATTERN [FILE]...");
197      System.err.println("Try `java " + PROGNAME + " --help' for more information.");
198      return 2;
199    }
200    try {
201      pattern = new RE(argv[g.getOptind()],cflags,syntax);
202    } catch (REException e) {
203      System.err.println("Error in expression: "+e);
204      return 2;
205    }
206
207    boolean notFound = true;
208    if (argv.length >= g.getOptind()+2) {
209      for (int i = g.getOptind() + 1; i < argv.length; i++) {
210          boolean no_filename = (argv.length == g.getOptind()+2)
211              || options[NO_FILENAME];
212        if (argv[i].equals("-")) {
213            final String filename = no_filename ? null : "(standard input)";
214            if (processStream(pattern,System.in,encoding,options,filename,null,out))
215                notFound = false;
216        } else {
217            final String filename = no_filename ? null : argv[i];
218            try {
219                File file = new File(argv[i]);
220                if(file.isDirectory()) {
221                    System.err.println(PROGNAME + ": " + argv[i] + ": Is a directory");
222                } else if(!file.canRead()) {
223                    System.err.println(PROGNAME + ": " + argv[i] + ": Permission denied");
224                } else if (options[EXPAND_ZIP_FILES] && argv[i].endsWith(".zip")) {
225                    // iterate over all files within this ZIP file
226                    try {
227                        ZipFile zf = new ZipFile(file);
228                        Enumeration list = zf.entries();
229                        while (list.hasMoreElements()) {
230                            ZipEntry ze = (ZipEntry) list.nextElement();
231                            if (! ze.isDirectory()) {
232                                if (processStream(pattern, zf.getInputStream(ze), encoding, options, filename, ze.getName(), out))
233                                    notFound = false;
234                            }
235                        }
236                    } catch (Exception ex) {
237                        System.err.println(PROGNAME + ": " + argv[i] + ": Problem reading ZIP file");
238                        return 2;
239                    }
240                } else {
241                    if (processStream(pattern,
242                                      new FileInputStream(argv[i]),
243                                      encoding, options, filename, null, out))
244                        notFound = false;
245                }
246            } catch (FileNotFoundException e) {
247                if (!options[SILENT])
248                    System.err.println(PROGNAME+": "+e);
249            }
250        }
251      }
252    } else {
253        if (processStream(pattern,System.in,encoding,options,null,null,out))
254            notFound = false;
255    }
256    return notFound ? 1 : 0;
257  }
258
259  private static boolean processStream(RE pattern, InputStream is, 
260                                       String encoding, boolean[] options, 
261                                       String filename, String zipName,
262                                       PrintStream out) {
263    try {
264      final InputStreamReader isr = encoding == null?
265        new InputStreamReader(is) : new InputStreamReader(is,encoding);
266      final BufferedReader r = new BufferedReader(isr);
267      return processReader(pattern, r, options, filename, zipName, out);
268    } catch (UnsupportedEncodingException uee) {
269      /* since grep() should have checked that the 'encoding' parameter
270         is valid, it should be impossible that this exception would
271         happen.  Of, sso, it is a logic error.
272      */
273      throw new Error(PROGNAME + ": programming logic error");
274    }
275  }
276
277    private static String fileNameString (String fileName, String zipName) {
278        if (zipName == null)
279            return fileName;
280        else
281            return zipName + " in " + fileName;
282    }
283
284  private static boolean processReader(RE pattern,
285                                       BufferedReader br,
286                                       boolean[] options, String filename,
287                                       String zipName, PrintStream out) {
288
289    int newlineLen = System.getProperty("line.separator").length();
290    int count = 0;
291    long atByte = 0;
292    int atLine = 1;
293    String line;
294    REMatch match;
295    
296    try {
297      while ((line = br.readLine()) != null) {
298        match = pattern.getMatch(line);
299        if (((options[LINE_REGEXP] && pattern.isMatch(line))
300             || (!options[LINE_REGEXP] && (match != null))) 
301            ^ options[REVERT_MATCH]) {
302          count++;
303          if (!options[COUNT]) {
304            if (options[QUIET]) {
305              return true;
306            }
307            if (options[FILES_WITH_MATCHES]) {
308              if (filename != null)
309                out.println(fileNameString(filename, zipName));
310              return true;
311            }
312            if (options[FILES_WITHOUT_MATCH]) {
313              return false;
314            }
315            if (filename != null) {
316                out.print(fileNameString(filename, zipName));
317                out.print(':');
318            }
319            if (options[LINE_NUMBER]) {
320              out.print(atLine);
321              out.print(':');
322            }
323            if (options[BYTE_OFFSET]) {
324              out.print(atByte + match.getStartIndex() );
325              out.print(':');
326            }
327            out.println(line);
328          }
329        } // a match
330        atByte += line.length() + newlineLen; // could be troublesome...
331        atLine++;
332      } // a valid line
333      br.close();
334
335      if (options[COUNT]) {
336        if (filename != null)
337          out.println(fileNameString(filename, zipName)+':');
338        out.println(count);
339      }
340      if (options[FILES_WITHOUT_MATCH] && count==0) {
341        if (filename != null)
342          out.println(fileNameString(filename, zipName));
343      }
344    } catch (IOException e) {
345      System.err.println(PROGNAME+": "+e);
346    }
347    return ((count > 0) ^ options[REVERT_MATCH]);
348  }
349}