public class NewsScraper extends Object
<%@ page language="java" %> <jsp:useBean id="newsBean" class="ca.bc.webarts.widgets.NewsScraper" scope="page" /> <HTML><BODY> <tablecellpadding="2" border="1"> <tr> <td valign="top" width="60%"> <center><U>Java News</U> </center> <BR /> <!-- Java News Feed --> <jsp:setProperty name="newsBean" property="connectionUrlStr" value="http://www-105.ibm.com/developerworks/news.nsf/dw/java-current-bydate?OpenDocument&Count=500&loc=j" /> <jsp:setProperty name="newsBean" property="startString" value="<!-- START CONTENT AREA -->" /> <jsp:setProperty name="newsBean" property="endString" value="<!-- END CONTENT AREA -->" /> <jsp:setProperty name="newsBean" property="searchScrapeTerm" value="developerworks/cgi-bin/click.cgi?url=" /> <span id="small"> <%= newsBean.doFullScrape() %> </span> </td> </tr> </table> </body> </html>
Modifier and Type | Field and Description |
---|---|
private int |
cacheTime_
The Seconds to wait before revisiting the URL Connection to update the
data that was scraped.
|
private String |
connectionUrlStr
The String representation of the URL to connect to
|
private String |
content |
private String |
endSearchScrapeTerm
An string used to mark the start of the scraped data from the URL
Connection data.
|
private String |
endString
An string used to mark the end of the usefull data from the URL Connection
data.
|
private String |
fullScrape_
A string that will hold the scraped data
|
private long |
lastVisitTime_
Description of the Field
|
private String |
linkPrependUrlStr_
The URL String that will get prepended to the front of the scraped News
link.
|
private int |
maxNewsItems_
The maximum number of scraped items to store in the fullScrape_
variable.
|
private InputStreamReader |
newsReader
The reader for that will read from the URL Connection.
|
private HttpURLConnection |
newsSourceConnection
The URL Connection that will be the source for the scraped items
|
private InputStream |
newsStream
The In Stream from the URL Connection.
|
private String |
searchScrapeTerm
An string used to mark the start of the scraped data from the URL
Connection data.
|
private String |
startString
An string used to mark the start of the usefull data from the URL
Connection data.
|
private boolean |
useCachedResults_ |
Constructor and Description |
---|
NewsScraper()
Constructor for the NewsScraper object
|
NewsScraper(HttpURLConnection con)
Constructor for the NewsScraper object
|
NewsScraper(HttpURLConnection con,
String strt,
String end)
Constructor for the NewsScraper object
|
NewsScraper(String connectionUrlStr,
String strt,
String end,
String searchTerm)
Constructor for the NewsScraper object
|
Modifier and Type | Method and Description |
---|---|
private String |
cleanScrapedEntry(String scrapedEntry)
This method takes the passed striing and removes any internal HTML tags
That might screw up the formatting of the scrape.
|
String |
doFullScrape()
Goes to the predefined URL and scrapes the requested data.
|
int |
getCacheTime()
Gets the cacheTime_ attribute of the NewsScraper object
|
String |
getConnectionUrlStr()
Gets the connectionUrlStr attribute of the NewsScraper object
|
String |
getEndSearchScrapeTerm()
Gets the endSearchScrapeTerm attribute of the NewsScraper object
|
String |
getEndString()
Gets the endString attribute of the NewsScraper object
|
String |
getFullScrape()
Gets the fullScrape_ attribute of the NewsScraper object
|
String |
getLinkPrependUrlStr() |
int |
getMaxNewsItems()
Gets the maxNewsItems_ attribute of the NewsScraper object
|
HttpURLConnection |
getNewsSourceConnection()
Gets the newsSourceConnection attribute of the NewsScraper object
|
String |
getSearchScrapeTerm()
Gets the searchScrapeTerm attribute of the NewsScraper object
|
String |
getStartString()
Gets the startString attribute of the NewsScraper object
|
static void |
main(String[] args)
The main program for the NewsScraper class, It serves as a test entry
point.
|
void |
setCacheTime(int cacheTime_)
Sets the cacheTime_ attribute of the NewsScraper object
|
void |
setConnectionUrlStr(String connectionUrlStr)
Sets the connectionUrlStr attribute of the NewsScraper object
|
void |
setEndSearchScrapeTerm(String endSearchScrapeTerm)
Sets the endSearchScrapeTerm attribute of the NewsScraper object
|
void |
setEndString(String endString)
Sets the endString attribute of the NewsScraper object
|
void |
setFullScrape(String fullScrape_)
Sets the fullScrape_ attribute of the NewsScraper object
|
void |
setLinkPrependUrlStr(String linkPrependUrlStr) |
void |
setMaxNewsItems(int maxNewsItems_)
Sets the maxNewsItems_ attribute of the NewsScraper object
|
void |
setNewsSourceConnection(HttpURLConnection newsSourceConnection)
Sets the newsSourceConnection attribute of the NewsScraper object
|
void |
setSearchScrapeTerm(String searchScrapeTerm)
Sets the searchScrapeTerm attribute of the NewsScraper object
|
void |
setStartString(String strtString)
Sets the startString attribute of the NewsScraper object
|
private HttpURLConnection newsSourceConnection
private InputStreamReader newsReader
private InputStream newsStream
private String connectionUrlStr
private String startString
private String endString
private String searchScrapeTerm
searchScrapeTerm) and the start of the the
endSearchScrapeTermwill be used as one item of scraped data.
private String endSearchScrapeTerm
searchScrapeTermand the start of the the
endSearchScrapeTermwill be used as one item of scraped data.
private String fullScrape_
private int maxNewsItems_
fullScrape_variable.
private String linkPrependUrlStr_
private int cacheTime_
private boolean useCachedResults_
private long lastVisitTime_
public NewsScraper()
public NewsScraper(HttpURLConnection con)
con
- Description of the Parameterpublic NewsScraper(HttpURLConnection con, String strt, String end)
con
- Description of the Parameterstrt
- Description of the Parameterend
- Description of the Parameterpublic NewsScraper(String connectionUrlStr, String strt, String end, String searchTerm)
connectionUrlStr
- Description of the Parameterstrt
- Description of the Parameterend
- Description of the ParametersearchTerm
- Description of the Parameterpublic void setMaxNewsItems(int maxNewsItems_)
maxNewsItems_
- The new maxNewsItems_ valuepublic int getMaxNewsItems()
public void setNewsSourceConnection(HttpURLConnection newsSourceConnection)
newsSourceConnection
- The new newsSourceConnection valuepublic HttpURLConnection getNewsSourceConnection()
public void setFullScrape(String fullScrape_)
fullScrape_
- The new fullScrape_ valuepublic String getFullScrape()
public void setSearchScrapeTerm(String searchScrapeTerm)
searchScrapeTerm
- The new searchScrapeTerm valuepublic String getSearchScrapeTerm()
public void setEndSearchScrapeTerm(String endSearchScrapeTerm)
endSearchScrapeTerm
- The new endSearchScrapeTerm valuepublic String getEndSearchScrapeTerm()
public void setLinkPrependUrlStr(String linkPrependUrlStr)
public String getLinkPrependUrlStr()
public void setConnectionUrlStr(String connectionUrlStr)
connectionUrlStr
- The new connectionUrlStr valuepublic String getConnectionUrlStr()
public void setStartString(String strtString)
strtString
- The new startString valuepublic String getStartString()
public void setEndString(String endString)
endString
- The new endString valuepublic String getEndString()
public void setCacheTime(int cacheTime_)
cacheTime_
- The new cacheTime_ valuepublic int getCacheTime()
private String cleanScrapedEntry(String scrapedEntry)
scrapedEntry
- The scraped String to parsepublic String doFullScrape()
WebARTS Library Licensed Under the GNU - General Public License. Other Libraries licensed under their respective Open Source Licenses