public class URLGenerator extends BaseGenerator
Modifier and Type | Class and Description |
---|---|
static class |
URLGenerator.Mode |
static interface |
URLGenerator.URLFilter
Defines url filter.
|
private class |
URLGenerator.Worker |
SourceBase.ElementType
Modifier and Type | Field and Description |
---|---|
protected int |
depthLimit |
protected String |
edgeWeight |
protected LinkedList<URLGenerator.URLFilter> |
filters |
protected Pattern |
hrefPattern |
protected ReentrantLock |
lock |
protected URLGenerator.Mode |
mode |
protected HashSet<String> |
newUrls |
protected String |
nodeWeight |
protected boolean |
printProgress |
private static String |
REGEX |
protected double |
step |
protected LinkedList<String> |
stepUrls |
protected int |
threads |
protected HashSet<String> |
urls |
addEdgeLabels, addNodeLabels, directed, edgeAttributeRange, edgeAttributes, internalGraph, nodeAttributeRange, nodeAttributes, random, randomlyDirected
attrSinks, eltsSinks, eventProcessing, eventQueue, sourceId, sourceTime
Constructor and Description |
---|
URLGenerator(String... startFrom) |
Modifier and Type | Method and Description |
---|---|
void |
acceptOnlyMatchingURL(String regex)
Can be used to filter url.
|
void |
addHostFilter(String... hosts)
Can be used to filter url according to the host.
|
protected void |
addNodeURL(String url) |
void |
addURL(String url)
Add an url to process.
|
void |
begin()
Begin the graph generation.
|
protected void |
connect(String url1,
String url2) |
void |
declineMatchingURL(String regex)
Can be used to filter url.
|
void |
enableProgression(boolean on) |
protected String |
getEdgeId(String nodeId1,
String nodeId2) |
protected String |
getNodeId(String url) |
protected String |
getNodeLabel(String url) |
protected boolean |
isValid(String url) |
boolean |
nextEvents()
Perform the next step in generating the graph.
|
protected void |
nextEventsThreaded() |
protected void |
parseUrl(String url)
Parse an url and add all extracted links in a specified set.
|
protected void |
progress() |
void |
setDepthLimit(int depthLimit)
Set the maximum steps before stop.
|
void |
setDirected(boolean on)
Create directed edges.
|
void |
setEdgeWeightAttribute(String attribute)
Set the attribute key used to store weight of edges.
|
void |
setMode(URLGenerator.Mode mode)
Set the way that url are converted to node id.
|
void |
setNodeWeightAttribute(String attribute)
Set the attribute key used to store weight of nodes.
|
void |
setThreadCount(int count)
Set the amount of threads used to parse urls.
|
protected void |
synchronizedOperation(String url1,
String url2) |
addEdge, addEdgeAttribute, addEdgeLabels, addNode, addNode, addNodeAttribute, addNodeLabels, clearKeptData, delEdge, delNode, end, isUsingInternalGraph, removeEdgeAttribute, removeNodeAttribute, setDirectedEdges, setEdgeAttributesRange, setNodeAttributesRange, setRandomSeed, setUseInternalGraph
addAttributeSink, addElementSink, addSink, attributeSinks, clearAttributeSinks, clearElementSinks, clearSinks, elementSinks, manageEvents, removeAttributeSink, removeElementSink, removeSink, sendAttributeChangedEvent, sendAttributeChangedEvent, sendEdgeAdded, sendEdgeAdded, sendEdgeAttributeAdded, sendEdgeAttributeAdded, sendEdgeAttributeChanged, sendEdgeAttributeChanged, sendEdgeAttributeRemoved, sendEdgeAttributeRemoved, sendEdgeRemoved, sendEdgeRemoved, sendGraphAttributeAdded, sendGraphAttributeAdded, sendGraphAttributeChanged, sendGraphAttributeChanged, sendGraphAttributeRemoved, sendGraphAttributeRemoved, sendGraphCleared, sendGraphCleared, sendNodeAdded, sendNodeAdded, sendNodeAttributeAdded, sendNodeAttributeAdded, sendNodeAttributeChanged, sendNodeAttributeChanged, sendNodeAttributeRemoved, sendNodeAttributeRemoved, sendNodeRemoved, sendNodeRemoved, sendStepBegins, sendStepBegins
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
addAttributeSink, addElementSink, addSink, clearAttributeSinks, clearElementSinks, clearSinks, removeAttributeSink, removeElementSink, removeSink
protected LinkedList<String> stepUrls
protected Pattern hrefPattern
protected URLGenerator.Mode mode
protected int threads
protected String nodeWeight
protected String edgeWeight
protected LinkedList<URLGenerator.URLFilter> filters
protected double step
protected boolean printProgress
protected int depthLimit
protected final ReentrantLock lock
public URLGenerator(String... startFrom)
public void begin()
Generator
Generator.nextEvents()
method to add elements to the graph.public boolean nextEvents()
Generator
public void setDirected(boolean on)
on
- true to create directed edgespublic void setNodeWeightAttribute(String attribute)
attribute
- attribute key of the weight of nodespublic void setEdgeWeightAttribute(String attribute)
attribute
- attribute key of the weight of edgespublic void setMode(URLGenerator.Mode mode)
mode
- mode specifying how to convert url to have node idpublic void setThreadCount(int count)
nextEvents()
step. At the end of this method, all working thread
have stop.count
- amount of threadspublic void setDepthLimit(int depthLimit)
depthLimit
- public void enableProgression(boolean on)
public void acceptOnlyMatchingURL(String regex)
regex
- public void declineMatchingURL(String regex)
regex
- public void addHostFilter(String... hosts)
hosts
- list of accepted hostsprotected void nextEventsThreaded()
protected void parseUrl(String url) throws IOException
url
- the url to parsenewUrls
- the set where extracted links will be addedIOException
protected String getNodeId(String url) throws URISyntaxException
URISyntaxException
protected String getNodeLabel(String url) throws URISyntaxException
URISyntaxException
protected void synchronizedOperation(String url1, String url2) throws URISyntaxException
URISyntaxException
protected void addNodeURL(String url) throws URISyntaxException
URISyntaxException
protected void connect(String url1, String url2) throws URISyntaxException
URISyntaxException
protected void progress()
WebARTS Library Licensed Under the GNU - General Public License. Other Libraries licensed under their respective Open Source Licenses