001/*
002 *  UrlGrab.java - A widget to grab URLs through a proxy.
003 *
004 *  $URL: svn://svn.webarts.bc.ca/open/trunk/projects/WebARTS/ca/bc/webarts/widgets/UrlGrab.java $
005 *  $Revision: 1235 $
006 *  $Date: 2018-03-02 22:30:24 -0800 (Fri, 02 Mar 2018) $
007 *
008 *
009 *  Written by Tom Gutwin - WebARTS Design.
010 *  Copyright (C) 2003-2016 WebARTS Design, North Vancouver Canada
011 *  http://www.webarts.bc.ca
012 *
013 *  This program is free software; you can redistribute it and/or modify
014 *  it under the terms of the GNU General Public License as published by
015 *  the Free Software Foundation; either version 2 of the License, or
016 *  (at your option) any later version.
017 *
018 *  This program is distributed in the hope that it will be useful,
019 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
020 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
021 *  GNU General Public License for more details.
022 *
023 *  You should have received a copy of the GNU General Public License
024 *  along with this program; if not, write to the Free Software
025 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
026 */
027package ca.bc.webarts.widgets;
028
029import java.io.File;
030import java.io.FileInputStream;
031import java.io.FileOutputStream;
032import java.io.FileNotFoundException;
033import java.io.InputStream;
034import java.io.IOException;
035
036import java.lang.SecurityException;
037
038import java.util.Properties;
039
040import java.net.Authenticator;
041import java.net.HttpURLConnection;
042import java.net.MalformedURLException;
043import java.net.URL;
044import java.net.URLConnection;
045import java.net.PasswordAuthentication;
046
047/**
048 * A very simplistic class to grab a URL and save the file to a local file.
049 * It includes the option to specify a proxy as well as authentication on that
050 * proxy.<br><br>It also includes a main method to test as well as use this
051 * classes functionality from the commandline.
052 *
053 * <pre>
054 *    Usage:
055 *      java -DproxySet=true -DproxyHost=YourProxyServerHostname
056 *           -DproxyPort=YourProxyServerPort
057 *           -DproxyUser=YourProxyUsernameIFNEEDED
058 *           -DproxyPassword=YourProxyUserPasswordIFNEEDED
059 *           -DnonProxyHosts=CommaSeperatedlistOfNonProxiedSites
060 *           ca.bc.webarts.widgets.UrlGrab [URL location 1] [URL location2]...
061 *</pre>
062 *
063 * @author     Tom Gutwin P.Eng
064 **/
065public class UrlGrab extends Authenticator
066{
067  /** A holder for the NA Clients System File Separator. **/
068  private static final String SYSTEM_FILE_SEPERATOR = File.separator;
069
070  /**
071   * A holder for the directory location to save downloads. DEFAULT is the
072   * current working dir.
073   **/
074  private static final String DEFAULT_SAVE_LOCATION =
075      System.getProperty("user.dir") + SYSTEM_FILE_SEPERATOR;
076
077  public static final String DEFAULT_PROXY_SET = "true";
078  public static final String DEFAULT_PROXY_HOST = "companyProxy";
079  public static final String DEFAULT_PROXY_PORT = "80";
080  public static final String DEFAULT_PROXY_NOPROXY = "";
081
082  static private boolean areWeUsingProxy_ = true;
083  static private boolean useProxyAuthentication_ = true;
084
085  /** the class Proxy Username to authenticate with. **/
086  static private String proxyUsername_ = "NedFlanders";
087
088  /** the class Proxy Password to authenticate with. **/
089  static private String proxyPassword_ = "someGoofyPasswordOfYourOwn";
090
091  /** the Proxy hostname. **/
092  static private String proxyHost_ = DEFAULT_PROXY_HOST;
093
094  /** the Proxy port. **/
095  static private String proxyPort_ = DEFAULT_PROXY_PORT;
096
097
098
099  /**
100   * The default empty constructor for this class.  It initsbut does no grabbing.
101   **/
102  public UrlGrab()
103  {
104  }
105
106
107  /**
108   * The main constructor for this class.  It inits and grabs the spec'd URL
109   * into the current directory.
110   *
111   * @param urlStr the string representation of the URL to grab.
112   *
113   **/
114  public UrlGrab(String urlStr)
115  {
116    new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests
117    URL urlToGet = null;
118      try
119      {
120        this.initProxy();
121        urlToGet = new URL(urlStr);
122        grab(urlToGet, DEFAULT_SAVE_LOCATION+
123          urlToGet.getFile().substring(urlToGet.getFile().lastIndexOf("/")+1 ));
124      }
125      catch (MalformedURLException badUrlEx)
126      {
127        System.out.println("Cannot grab " + urlStr + ".");
128      }
129  }
130
131
132  /**
133   * A constructor for this class that takes an array of URLs.  It inits and
134   * grabs the spec'd URLs into the current directory.
135   *
136   * @param urlStr the string representation of the URL to grab.
137   *
138   **/
139  public UrlGrab(String[] urlStr)
140  {
141    new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests
142    URL urlToGet = null;
143    for (int i=0; i < urlStr.length; i++)
144    {
145      try
146      {
147        this.initProxy();
148        urlToGet = new URL(urlStr[i]);
149        String grabbedFilename = "";
150        if (urlStr[i] !=null && !urlStr[i].equals(""))
151        {
152          grabbedFilename =
153          urlToGet.getFile().substring(urlToGet.getFile().lastIndexOf("/")+1 );
154          if (grabbedFilename.equals(""))
155            grabbedFilename = "unamedGrabbedUrl";
156        }
157        grab(urlToGet, DEFAULT_SAVE_LOCATION + grabbedFilename);
158      }
159      catch (MalformedURLException badUrlEx)
160      {
161        System.out.println("Cannot grab " + urlStr[i] + ".");
162      }
163    }
164  }
165
166
167  /**
168   * The main constructor for this class.  It inits and grabs the spec'd URL
169   * into the spec'd filename.
170   *
171   * @param urlToGet the URL to grab.
172   * @param grabbedFilename the filename of the local storage for the URL file.
173   *
174   **/
175  public UrlGrab(URL urlToGet, String grabbedFilename)
176  {
177    if(urlToGet != null)
178    {
179      new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests
180      this.initProxy();
181      grab(urlToGet, grabbedFilename);
182    }
183    else
184    {
185      System.out.println("Cannot grab a null URL.");
186    }
187  }
188
189
190  /**
191   *
192   *  A simple/useless constructor for the sole purpose of providing the
193   *  Authenticator impl to the net authentication mechanism.  Both the passed
194   *  params can be null or "" - if they are the default class username and
195   *  pass are used.
196   *
197   * @param  proxyUser is the usernamer to pass to the proxy
198   * @param  proxyPass is the password to pass to the proxy
199   */
200  public UrlGrab(String proxyUser, String proxyPass)
201  {
202    if (proxyUser != null && !proxyUser.equals(""))
203      proxyUsername_ = proxyUser;
204    if (proxyPass != null && !proxyPass.equals(""))
205      proxyPassword_ = proxyPass;
206  }
207
208
209  /**
210   * The main entry for this app. It takes url strings on the commandline.
211   *
212   * @param args are the URLs to grab.
213   **/
214  public static void main(String [] args)
215  {
216    if (args.length > 0)
217    {
218      UrlGrab instance = new UrlGrab();
219      for (int i=0; i< args.length; i++)
220      {
221        System.out.println("URL"+i+ " = " + args[i]);
222        System.out.println(instance.grab(args[i]));
223        System.out.println();
224      }
225    }
226    else
227    {
228      System.out.println("You must specify at least 1 URL to grab.");
229    }
230  }
231
232
233  /**
234   *  Does the gruntwork to get the proxy properties set.
235   **/
236
237  public static void initProxy()
238  {
239    // Set up the proxy settings if needed
240    Properties sysProps = System.getProperties();
241    String tmpUseproxy  = sysProps.getProperty("proxySet");
242    if (tmpUseproxy != null && !tmpUseproxy.equals(""))
243      areWeUsingProxy_ = (tmpUseproxy.toLowerCase().equals("true")?true:false);
244
245    if (areWeUsingProxy_)
246    {
247      System.out.println("Initializing the proxy settings");
248      //check if the user had something defined in the Sys Props via cmdline.
249      String tmpUser  = sysProps.getProperty("http.proxyUser");
250      String tmpPass  = sysProps.getProperty("http.proxyPassword");
251      String tmpHost  = sysProps.getProperty("http.proxyHost");
252      String tmpPort  = sysProps.getProperty("http.proxyPort");
253
254      if (tmpUser != null && !tmpUser.equals(""))
255        proxyUsername_ = tmpUser;
256      if (tmpPass != null && !tmpPass.equals(""))
257        proxyPassword_ = tmpPass;
258      if (tmpHost != null && !tmpHost.equals(""))
259        proxyHost_ = tmpHost;
260      if (tmpPort != null && !tmpPort.equals(""))
261        proxyPort_ = tmpHost;
262
263      if (useProxyAuthentication_)
264      {
265        System.out.println("Authenticating the proxy.");
266        Authenticator.setDefault(new UrlGrab(proxyUsername_, proxyPassword_));
267      }
268      sysProps.put("proxySet", "true");
269      sysProps.put("http.proxyHost", proxyHost_);
270      sysProps.put("http.proxyPort", proxyPort_);
271      sysProps.put("noProxy", "*.bctc.com,*.bchydro.com");
272    }
273
274  }
275
276
277   /**
278   *  Impl for Authenticator so an authorizing proxy will work.
279   **/
280
281  protected PasswordAuthentication getPasswordAuthentication()
282  {
283    return new PasswordAuthentication(
284        proxyUsername_, proxyPassword_.toCharArray());
285  }
286
287
288  /** Simple URL to String using java 8. **/
289  public String grab(String urlStr)
290  {
291    String retVal = "";
292    try
293    {
294      URL url = new URL(urlStr);
295      retVal = grab(url);
296    }
297    catch (MalformedURLException badUrlEx)
298    {
299      System.out.println("Cannot grab " + urlStr + " at this time:");
300      System.out.println("Reason: Malformed URL Exception URL for " + urlStr);
301    }
302    return retVal;
303  }
304
305
306  /** Simple URL to String using java 8. **/
307  public String grab(URL url)
308  {
309    String pageText = "empty";
310    //java.net.URL url = new java.net.URL(localJolokiaStr_ +"/search/org.eclipse.jetty.webapp:type=webappcontext,*");
311    try
312    {
313      java.net.URLConnection conn = url.openConnection();
314      try (java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(conn.getInputStream(), "UTF-8")))
315      {
316        pageText = reader.lines().collect(java.util.stream.Collectors.joining("\n"));
317      }
318    }
319    catch (MalformedURLException badUrlEx)
320    {
321      System.out.println("Cannot grab " + url + " at this time:");
322      System.out.println("Reason: Malformed URL Exception URL for " + url);
323    }
324    catch (IOException ioEx)
325    {
326      System.out.println("Cannot grab " + url + " at this time:");
327      System.out.println("Reason: The remote download URL specified " +
328                         url + " is not permitting a connection.");
329    }
330
331    return pageText;
332  }
333
334
335  public boolean grab(URL urlToGet, String grabbedFilename)
336  {
337    boolean retVal = true;
338    if (!(grabbedFilename !=null && !grabbedFilename.equals("")))
339      grabbedFilename = "unamedGrabbedUrl";
340    //go get it
341    try
342    {
343      URLConnection connection =
344        (URLConnection) urlToGet.openConnection();
345      System.out.println(connection.getContentType() + "> ("+
346                         connection.getContentLength() + ")");
347
348      // Store the file locally
349      InputStream inUrl = null;
350      if (urlToGet.getProtocol().equals("http"))
351        inUrl = ((HttpURLConnection) connection).getInputStream();
352      else
353        inUrl = connection.getInputStream();
354      FileOutputStream ostream = new FileOutputStream(grabbedFilename);
355
356      // start transfering
357      byte [] bytesRead= new byte[1024];
358      int numBytes = inUrl.read(bytesRead);
359      int status = 0;
360      System.out.print("..");
361      while (numBytes != -1)
362      {
363        // write what we read
364        ostream.write(bytesRead, 0, numBytes);
365
366        // update our status indicator
367        status += numBytes;
368        if (status % 1024 == 0)
369          System.out.print(".");
370
371        // go back for more
372        numBytes = inUrl.read(bytesRead);
373      }
374      System.out.println(". Done ("+ status +" bytes grabbed)");
375      ostream.close();
376      retVal = true;
377    }
378    catch (NullPointerException nullEx)
379    {
380      System.out.println("Cannot grab " + grabbedFilename + " at this time:");
381      System.out.println("Reason: Cannot get the content from " +
382                         urlToGet.toString());
383      retVal = false;
384    }
385    catch (MalformedURLException badUrlEx)
386    {
387      System.out.println("Cannot grab " + grabbedFilename + " at this time:");
388      System.out.println("Reason: Cannot get the URL for " +
389                         grabbedFilename);
390      retVal = false;
391    }
392    catch (SecurityException secEx)
393    {
394      System.out.println("Cannot grab " + grabbedFilename + " at this time:");
395      System.out.println("Reason: Cannot create the file due to security " +
396                         "reasons.");
397      retVal = false;
398    }
399    catch (FileNotFoundException fnfEx)
400    {
401      System.out.println("Cannot grab " + grabbedFilename + " at this time:");
402      System.out.println("Reason: Cannot write the File.");
403      retVal = false;
404    }
405    catch (IOException ioEx)
406    {
407      System.out.println("Cannot grab " + grabbedFilename + " at this time:");
408      System.out.println("Reason: The remote download URL specified " +
409                         urlToGet + " is not permitting a connection.");
410      retVal = false;
411    }
412    return retVal;
413  }
414
415}
416