001/* 002 * UrlGrab.java - A widget to grab URLs through a proxy. 003 * 004 * $URL: svn://svn.webarts.bc.ca/open/trunk/projects/WebARTS/ca/bc/webarts/widgets/UrlGrab.java $ 005 * $Revision: 1235 $ 006 * $Date: 2018-03-02 22:30:24 -0800 (Fri, 02 Mar 2018) $ 007 * 008 * 009 * Written by Tom Gutwin - WebARTS Design. 010 * Copyright (C) 2003-2016 WebARTS Design, North Vancouver Canada 011 * http://www.webarts.bc.ca 012 * 013 * This program is free software; you can redistribute it and/or modify 014 * it under the terms of the GNU General Public License as published by 015 * the Free Software Foundation; either version 2 of the License, or 016 * (at your option) any later version. 017 * 018 * This program is distributed in the hope that it will be useful, 019 * but WITHOUT ANY WARRANTY; without even the implied warranty of 020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 021 * GNU General Public License for more details. 022 * 023 * You should have received a copy of the GNU General Public License 024 * along with this program; if not, write to the Free Software 025 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 026 */ 027package ca.bc.webarts.widgets; 028 029import java.io.File; 030import java.io.FileInputStream; 031import java.io.FileOutputStream; 032import java.io.FileNotFoundException; 033import java.io.InputStream; 034import java.io.IOException; 035 036import java.lang.SecurityException; 037 038import java.util.Properties; 039 040import java.net.Authenticator; 041import java.net.HttpURLConnection; 042import java.net.MalformedURLException; 043import java.net.URL; 044import java.net.URLConnection; 045import java.net.PasswordAuthentication; 046 047/** 048 * A very simplistic class to grab a URL and save the file to a local file. 049 * It includes the option to specify a proxy as well as authentication on that 050 * proxy.<br><br>It also includes a main method to test as well as use this 051 * classes functionality from the commandline. 052 * 053 * <pre> 054 * Usage: 055 * java -DproxySet=true -DproxyHost=YourProxyServerHostname 056 * -DproxyPort=YourProxyServerPort 057 * -DproxyUser=YourProxyUsernameIFNEEDED 058 * -DproxyPassword=YourProxyUserPasswordIFNEEDED 059 * -DnonProxyHosts=CommaSeperatedlistOfNonProxiedSites 060 * ca.bc.webarts.widgets.UrlGrab [URL location 1] [URL location2]... 061 *</pre> 062 * 063 * @author Tom Gutwin P.Eng 064 **/ 065public class UrlGrab extends Authenticator 066{ 067 /** A holder for the NA Clients System File Separator. **/ 068 private static final String SYSTEM_FILE_SEPERATOR = File.separator; 069 070 /** 071 * A holder for the directory location to save downloads. DEFAULT is the 072 * current working dir. 073 **/ 074 private static final String DEFAULT_SAVE_LOCATION = 075 System.getProperty("user.dir") + SYSTEM_FILE_SEPERATOR; 076 077 public static final String DEFAULT_PROXY_SET = "true"; 078 public static final String DEFAULT_PROXY_HOST = "companyProxy"; 079 public static final String DEFAULT_PROXY_PORT = "80"; 080 public static final String DEFAULT_PROXY_NOPROXY = ""; 081 082 static private boolean areWeUsingProxy_ = true; 083 static private boolean useProxyAuthentication_ = true; 084 085 /** the class Proxy Username to authenticate with. **/ 086 static private String proxyUsername_ = "NedFlanders"; 087 088 /** the class Proxy Password to authenticate with. **/ 089 static private String proxyPassword_ = "someGoofyPasswordOfYourOwn"; 090 091 /** the Proxy hostname. **/ 092 static private String proxyHost_ = DEFAULT_PROXY_HOST; 093 094 /** the Proxy port. **/ 095 static private String proxyPort_ = DEFAULT_PROXY_PORT; 096 097 098 099 /** 100 * The default empty constructor for this class. It initsbut does no grabbing. 101 **/ 102 public UrlGrab() 103 { 104 } 105 106 107 /** 108 * The main constructor for this class. It inits and grabs the spec'd URL 109 * into the current directory. 110 * 111 * @param urlStr the string representation of the URL to grab. 112 * 113 **/ 114 public UrlGrab(String urlStr) 115 { 116 new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests 117 URL urlToGet = null; 118 try 119 { 120 this.initProxy(); 121 urlToGet = new URL(urlStr); 122 grab(urlToGet, DEFAULT_SAVE_LOCATION+ 123 urlToGet.getFile().substring(urlToGet.getFile().lastIndexOf("/")+1 )); 124 } 125 catch (MalformedURLException badUrlEx) 126 { 127 System.out.println("Cannot grab " + urlStr + "."); 128 } 129 } 130 131 132 /** 133 * A constructor for this class that takes an array of URLs. It inits and 134 * grabs the spec'd URLs into the current directory. 135 * 136 * @param urlStr the string representation of the URL to grab. 137 * 138 **/ 139 public UrlGrab(String[] urlStr) 140 { 141 new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests 142 URL urlToGet = null; 143 for (int i=0; i < urlStr.length; i++) 144 { 145 try 146 { 147 this.initProxy(); 148 urlToGet = new URL(urlStr[i]); 149 String grabbedFilename = ""; 150 if (urlStr[i] !=null && !urlStr[i].equals("")) 151 { 152 grabbedFilename = 153 urlToGet.getFile().substring(urlToGet.getFile().lastIndexOf("/")+1 ); 154 if (grabbedFilename.equals("")) 155 grabbedFilename = "unamedGrabbedUrl"; 156 } 157 grab(urlToGet, DEFAULT_SAVE_LOCATION + grabbedFilename); 158 } 159 catch (MalformedURLException badUrlEx) 160 { 161 System.out.println("Cannot grab " + urlStr[i] + "."); 162 } 163 } 164 } 165 166 167 /** 168 * The main constructor for this class. It inits and grabs the spec'd URL 169 * into the spec'd filename. 170 * 171 * @param urlToGet the URL to grab. 172 * @param grabbedFilename the filename of the local storage for the URL file. 173 * 174 **/ 175 public UrlGrab(URL urlToGet, String grabbedFilename) 176 { 177 if(urlToGet != null) 178 { 179 new File(DEFAULT_SAVE_LOCATION).mkdir(); // ensures the dir exests 180 this.initProxy(); 181 grab(urlToGet, grabbedFilename); 182 } 183 else 184 { 185 System.out.println("Cannot grab a null URL."); 186 } 187 } 188 189 190 /** 191 * 192 * A simple/useless constructor for the sole purpose of providing the 193 * Authenticator impl to the net authentication mechanism. Both the passed 194 * params can be null or "" - if they are the default class username and 195 * pass are used. 196 * 197 * @param proxyUser is the usernamer to pass to the proxy 198 * @param proxyPass is the password to pass to the proxy 199 */ 200 public UrlGrab(String proxyUser, String proxyPass) 201 { 202 if (proxyUser != null && !proxyUser.equals("")) 203 proxyUsername_ = proxyUser; 204 if (proxyPass != null && !proxyPass.equals("")) 205 proxyPassword_ = proxyPass; 206 } 207 208 209 /** 210 * The main entry for this app. It takes url strings on the commandline. 211 * 212 * @param args are the URLs to grab. 213 **/ 214 public static void main(String [] args) 215 { 216 if (args.length > 0) 217 { 218 UrlGrab instance = new UrlGrab(); 219 for (int i=0; i< args.length; i++) 220 { 221 System.out.println("URL"+i+ " = " + args[i]); 222 System.out.println(instance.grab(args[i])); 223 System.out.println(); 224 } 225 } 226 else 227 { 228 System.out.println("You must specify at least 1 URL to grab."); 229 } 230 } 231 232 233 /** 234 * Does the gruntwork to get the proxy properties set. 235 **/ 236 237 public static void initProxy() 238 { 239 // Set up the proxy settings if needed 240 Properties sysProps = System.getProperties(); 241 String tmpUseproxy = sysProps.getProperty("proxySet"); 242 if (tmpUseproxy != null && !tmpUseproxy.equals("")) 243 areWeUsingProxy_ = (tmpUseproxy.toLowerCase().equals("true")?true:false); 244 245 if (areWeUsingProxy_) 246 { 247 System.out.println("Initializing the proxy settings"); 248 //check if the user had something defined in the Sys Props via cmdline. 249 String tmpUser = sysProps.getProperty("http.proxyUser"); 250 String tmpPass = sysProps.getProperty("http.proxyPassword"); 251 String tmpHost = sysProps.getProperty("http.proxyHost"); 252 String tmpPort = sysProps.getProperty("http.proxyPort"); 253 254 if (tmpUser != null && !tmpUser.equals("")) 255 proxyUsername_ = tmpUser; 256 if (tmpPass != null && !tmpPass.equals("")) 257 proxyPassword_ = tmpPass; 258 if (tmpHost != null && !tmpHost.equals("")) 259 proxyHost_ = tmpHost; 260 if (tmpPort != null && !tmpPort.equals("")) 261 proxyPort_ = tmpHost; 262 263 if (useProxyAuthentication_) 264 { 265 System.out.println("Authenticating the proxy."); 266 Authenticator.setDefault(new UrlGrab(proxyUsername_, proxyPassword_)); 267 } 268 sysProps.put("proxySet", "true"); 269 sysProps.put("http.proxyHost", proxyHost_); 270 sysProps.put("http.proxyPort", proxyPort_); 271 sysProps.put("noProxy", "*.bctc.com,*.bchydro.com"); 272 } 273 274 } 275 276 277 /** 278 * Impl for Authenticator so an authorizing proxy will work. 279 **/ 280 281 protected PasswordAuthentication getPasswordAuthentication() 282 { 283 return new PasswordAuthentication( 284 proxyUsername_, proxyPassword_.toCharArray()); 285 } 286 287 288 /** Simple URL to String using java 8. **/ 289 public String grab(String urlStr) 290 { 291 String retVal = ""; 292 try 293 { 294 URL url = new URL(urlStr); 295 retVal = grab(url); 296 } 297 catch (MalformedURLException badUrlEx) 298 { 299 System.out.println("Cannot grab " + urlStr + " at this time:"); 300 System.out.println("Reason: Malformed URL Exception URL for " + urlStr); 301 } 302 return retVal; 303 } 304 305 306 /** Simple URL to String using java 8. **/ 307 public String grab(URL url) 308 { 309 String pageText = "empty"; 310 //java.net.URL url = new java.net.URL(localJolokiaStr_ +"/search/org.eclipse.jetty.webapp:type=webappcontext,*"); 311 try 312 { 313 java.net.URLConnection conn = url.openConnection(); 314 try (java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(conn.getInputStream(), "UTF-8"))) 315 { 316 pageText = reader.lines().collect(java.util.stream.Collectors.joining("\n")); 317 } 318 } 319 catch (MalformedURLException badUrlEx) 320 { 321 System.out.println("Cannot grab " + url + " at this time:"); 322 System.out.println("Reason: Malformed URL Exception URL for " + url); 323 } 324 catch (IOException ioEx) 325 { 326 System.out.println("Cannot grab " + url + " at this time:"); 327 System.out.println("Reason: The remote download URL specified " + 328 url + " is not permitting a connection."); 329 } 330 331 return pageText; 332 } 333 334 335 public boolean grab(URL urlToGet, String grabbedFilename) 336 { 337 boolean retVal = true; 338 if (!(grabbedFilename !=null && !grabbedFilename.equals(""))) 339 grabbedFilename = "unamedGrabbedUrl"; 340 //go get it 341 try 342 { 343 URLConnection connection = 344 (URLConnection) urlToGet.openConnection(); 345 System.out.println(connection.getContentType() + "> ("+ 346 connection.getContentLength() + ")"); 347 348 // Store the file locally 349 InputStream inUrl = null; 350 if (urlToGet.getProtocol().equals("http")) 351 inUrl = ((HttpURLConnection) connection).getInputStream(); 352 else 353 inUrl = connection.getInputStream(); 354 FileOutputStream ostream = new FileOutputStream(grabbedFilename); 355 356 // start transfering 357 byte [] bytesRead= new byte[1024]; 358 int numBytes = inUrl.read(bytesRead); 359 int status = 0; 360 System.out.print(".."); 361 while (numBytes != -1) 362 { 363 // write what we read 364 ostream.write(bytesRead, 0, numBytes); 365 366 // update our status indicator 367 status += numBytes; 368 if (status % 1024 == 0) 369 System.out.print("."); 370 371 // go back for more 372 numBytes = inUrl.read(bytesRead); 373 } 374 System.out.println(". Done ("+ status +" bytes grabbed)"); 375 ostream.close(); 376 retVal = true; 377 } 378 catch (NullPointerException nullEx) 379 { 380 System.out.println("Cannot grab " + grabbedFilename + " at this time:"); 381 System.out.println("Reason: Cannot get the content from " + 382 urlToGet.toString()); 383 retVal = false; 384 } 385 catch (MalformedURLException badUrlEx) 386 { 387 System.out.println("Cannot grab " + grabbedFilename + " at this time:"); 388 System.out.println("Reason: Cannot get the URL for " + 389 grabbedFilename); 390 retVal = false; 391 } 392 catch (SecurityException secEx) 393 { 394 System.out.println("Cannot grab " + grabbedFilename + " at this time:"); 395 System.out.println("Reason: Cannot create the file due to security " + 396 "reasons."); 397 retVal = false; 398 } 399 catch (FileNotFoundException fnfEx) 400 { 401 System.out.println("Cannot grab " + grabbedFilename + " at this time:"); 402 System.out.println("Reason: Cannot write the File."); 403 retVal = false; 404 } 405 catch (IOException ioEx) 406 { 407 System.out.println("Cannot grab " + grabbedFilename + " at this time:"); 408 System.out.println("Reason: The remote download URL specified " + 409 urlToGet + " is not permitting a connection."); 410 retVal = false; 411 } 412 return retVal; 413 } 414 415} 416