public class HttpUrlConnectionScrape extends Object
Modifier and Type | Field and Description |
---|---|
protected String |
baseUrl_ |
protected HttpsURLConnection |
connection_ |
protected List<String> |
cookies_ |
protected String |
dateStr_ |
protected boolean |
debugOut_ |
private String |
loginFormID_ |
protected String |
loginPageResponse_
Cache of the response text returned from the login page post.
|
private String |
loginUrl_ |
private String |
password_ |
private String |
passwordFormElementName_ |
private HashMap<String,String> |
requestProps_ |
protected static Calendar |
rightNow_ |
protected static int |
SCRAPE_CR |
protected static int |
SCRAPE_OC |
private String |
scrapeEnd_ |
private String |
scrapePageUrl_ |
protected static int |
scrapeSite_ |
private String |
scrapeStart_ |
protected static String |
USER_AGENT |
private String |
username_ |
private String |
usernameFormElementName_ |
Constructor and Description |
---|
HttpUrlConnectionScrape()
default constructor does nothing.
|
HttpUrlConnectionScrape(String loginUrl,
HashMap<String,String> requestProps,
String loginFormID,
String usernameFormElementName,
String passwordFormElementName,
String username,
String password,
String scrapePageUrl,
String scrapeStart,
String scrapeEnd)
All In One constructor.
|
Modifier and Type | Method and Description |
---|---|
boolean |
doLogin()
Sends the POST to the login url parameters from the classVars.
|
boolean |
doLogin(String loginUrl,
HashMap<String,String> requestProps,
String formID,
String usernameFormElementName,
String passwordFormElementName,
String username,
String password)
Sends the POST to the login url with all required parameters.
|
String |
doScrape(String scrapePageUrl) |
String |
doScrape(String scrapePageUrl,
String scrapeStart,
String scrapeEnd) |
String |
getBaseUrl()
Get Method for class field 'baseUrl_'.
|
HttpsURLConnection |
getConnection()
Get Method for class field 'connection_'.
|
List<String> |
getCookies() |
String |
getLoginFormID()
Get Method for class field 'loginFormID_'.
|
String |
getLoginFormParams(String html,
String formID,
String usernameFormElementName,
String passwordFormElementName,
String username,
String password)
Concatenates together the URL parameter string (ie. name=value&name2=val2&someOtherParamName=val3 )
for a specific login form in the passed URL/html string.
|
String |
getLoginUrl()
Get Method for class field 'loginUrl_'.
|
private String |
getPageContent(String url)
Connects/retrieves a URL; pulls its coockkies and returns the resulting htnl as a string.
|
String |
getPassword()
Get Method for class field 'password_'.
|
String |
getPasswordFormElementName()
Get Method for class field 'passwordFormElementName_'.
|
HashMap<String,String> |
getRequestProps()
Get Method for class field 'requestProps_'.
|
String |
getScrapeEnd()
Get Method for class field 'scrapeEnd_'.
|
String |
getScrapePageUrl()
Get Method for class field 'scrapePageUrl_'.
|
String |
getScrapeStart()
Get Method for class field 'scrapeStart_'.
|
String |
getUsername()
Get Method for class field 'username_'.
|
String |
getUsernameFormElementName()
Get Method for class field 'usernameFormElementName_'.
|
static void |
main(String[] args) |
private int |
sendPost(String url,
String postParams,
HashMap<String,String> reqProps)
Sends a POST request to the url, along with all the passed post parameters and request properties.
|
void |
setBaseUrl(String baseUrl)
Set Method for class field 'baseUrl_'.
|
void |
setConnection(HttpsURLConnection connection)
Set Method for class field 'connection_'.
|
void |
setCookies(List<String> cookies) |
void |
setLoginFormID_(String loginFormID)
Set Method for class field 'loginFormID_'.
|
void |
setLoginUrl(String loginUrl)
Set Method for class field 'loginUrl_'.
|
void |
setPassword(String password)
Set Method for class field 'password_'.
|
void |
setPasswordFormElementName(String passwordFormElementName)
Set Method for class field 'passwordFormElementName_'.
|
HashMap<String,String> |
setRequestProps(HashMap<String,String> requestProps)
Set Method for class field 'requestProps_'.
|
void |
setScrapeEnd(String scrapeEnd)
Set Method for class field 'scrapeEnd_'.
|
void |
setScrapePageUrl(String scrapePageUrl)
Set Method for class field 'scrapePageUrl_'.
|
void |
setScrapeStart(String scrapeStart)
Set Method for class field 'scrapeStart_'.
|
void |
setUsername(String username)
Set Method for class field 'username_'.
|
void |
setUsernameFormElementName(String usernameFormElementName)
Set Method for class field 'usernameFormElementName_'.
|
static String |
writeStringToFile(String s,
String fileName)
Abstracts the writing of string to a file.
|
static String |
writeStringToFile(String s,
String fileName,
boolean zipCompress)
Abstracts the writing of string to a (zip) file (Zip NOT IMPLEMENTED YET).
|
protected static final int SCRAPE_OC
protected static final int SCRAPE_CR
protected static final String USER_AGENT
protected static int scrapeSite_
protected boolean debugOut_
protected HttpsURLConnection connection_
protected String loginPageResponse_
private HashMap<String,String> requestProps_
private String loginFormID_
private String usernameFormElementName_
private String passwordFormElementName_
private String scrapePageUrl_
private String scrapeStart_
private String scrapeEnd_
public HttpUrlConnectionScrape()
public HttpUrlConnectionScrape(String loginUrl, HashMap<String,String> requestProps, String loginFormID, String usernameFormElementName, String passwordFormElementName, String username, String password, String scrapePageUrl, String scrapeStart, String scrapeEnd)
public void setBaseUrl(String baseUrl)
baseUrl_
- is the value to set this class field to.public String getBaseUrl()
public void setLoginUrl(String loginUrl)
loginUrl
- is the value to set this class field to.public String getLoginUrl()
public HashMap<String,String> setRequestProps(HashMap<String,String> requestProps)
requestProps
- is the value to set this class field to.public HashMap<String,String> getRequestProps()
public void setLoginFormID_(String loginFormID)
loginFormID
- is the value to set this class field to.public String getLoginFormID()
public void setUsernameFormElementName(String usernameFormElementName)
usernameFormElementName_
- is the value to set this class field to.public String getUsernameFormElementName()
public void setPasswordFormElementName(String passwordFormElementName)
passwordFormElementName_
- is the value to set this class field to.public String getPasswordFormElementName()
public void setUsername(String username)
username_
- is the value to set this class field to.public String getUsername()
public void setPassword(String password)
password_
- is the value to set this class field to.public String getPassword()
public void setScrapePageUrl(String scrapePageUrl)
scrapePageUrl_
- is the value to set this class field to.public String getScrapePageUrl()
public void setScrapeStart(String scrapeStart)
scrapeStart_
- is the value to set this class field to.public String getScrapeStart()
public void setScrapeEnd(String scrapeEnd)
scrapeEnd_
- is the value to set this class field to.public String getScrapeEnd()
public List<String> getCookies()
public void setCookies(List<String> cookies)
public void setConnection(HttpsURLConnection connection)
connection_
- is the value to set this class field to.public HttpsURLConnection getConnection()
public boolean doLogin()
public boolean doLogin(String loginUrl, HashMap<String,String> requestProps, String formID, String usernameFormElementName, String passwordFormElementName, String username, String password)
HashMapreqProps = new HashMap (); reqProps.put("Accept","text/html,application/xhtml+xml,application/xml"); reqProps.put("Accept-Encoding ","gzip, deflate, br"); reqProps.put("Accept-Language ","en-US,en;q=0.5"); reqProps.put("Connection","keep-alive"); reqProps.put("Content-Type","application/x-www-form-urlencoded"); reqProps.put("Host","red.webarts.bc.ca"); //reqProps.put("Referer","red.webarts.bc.ca"); reqProps.put("Upgrade-Insecure-Requests","1");
loginUrl
- requestProps
- formID
- usernameFormElementName
- passwordFormElementName
- username
- password
- private int sendPost(String url, String postParams, HashMap<String,String> reqProps) throws Exception
url
- is the url to post this stuff topostParams
- is a sequential string of params that get sent in this postException
private String getPageContent(String url) throws Exception
Exception
public String getLoginFormParams(String html, String formID, String usernameFormElementName, String passwordFormElementName, String username, String password) throws UnsupportedEncodingException
html
- is the string representation of the URL that has the form to parseformID
- the form id (or name) to parse parameters fromusernameFormElementName
- the form elementName used for the username fieldpasswordFormElementNamethe
- form elementName used for the password fieldusername
- the actual login username to use in the formpassword
- the actual login password to use in the formUnsupportedEncodingException
public static String writeStringToFile(String s, String fileName)
s
- is the String to writeoutfileName
- is the file name of the file to write the String intopublic static String writeStringToFile(String s, String fileName, boolean zipCompress)
s
- is the String to writeoutfileName
- is the file name of the file to write the String intozipCompress
- boolean fall to compress with zip compressionWebARTS Library Licensed Under the GNU - General Public License. Other Libraries licensed under their respective Open Source Licenses