001/* 002 * Copyright 2006 - 2013 003 * Stefan Balev <stefan.balev@graphstream-project.org> 004 * Julien Baudry <julien.baudry@graphstream-project.org> 005 * Antoine Dutot <antoine.dutot@graphstream-project.org> 006 * Yoann Pigné <yoann.pigne@graphstream-project.org> 007 * Guilhelm Savin <guilhelm.savin@graphstream-project.org> 008 * 009 * This file is part of GraphStream <http://graphstream-project.org>. 010 * 011 * GraphStream is a library whose purpose is to handle static or dynamic 012 * graph, create them from scratch, file or any source and display them. 013 * 014 * This program is free software distributed under the terms of two licenses, the 015 * CeCILL-C license that fits European law, and the GNU Lesser General Public 016 * License. You can use, modify and/ or redistribute the software under the terms 017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following 018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by 019 * the Free Software Foundation, either version 3 of the License, or (at your 020 * option) any later version. 021 * 022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY 023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 024 * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. 025 * 026 * You should have received a copy of the GNU Lesser General Public License 027 * along with this program. If not, see <http://www.gnu.org/licenses/>. 028 * 029 * The fact that you are presently reading this means that you have had 030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms. 031 */ 032package org.graphstream.algorithm; 033 034import java.util.ArrayList; 035import java.util.List; 036 037import org.graphstream.graph.Graph; 038import org.graphstream.graph.Node; 039import org.graphstream.stream.ElementSink; 040 041/** 042 * <p> 043 * The PageRank is an algorithm that measures the "importance" of the nodes in a 044 * graph. It assigns to each node a rank. This rank corresponds to the 045 * probability that a "random surfer" visits the node. The surfer goes from node 046 * to node in the following way: with probability <it>d</it> she chooses a 047 * random outgoing arc and with probability <it>1 - d</it> she "teleports" to a 048 * random node (possibly not connected to the current). The probability 049 * <it>d</it> is called damping factor. By default it is 0.85 but it can be 050 * customized (see {@link #setDampingFactor(double)}). The ranks are real 051 * numbers between 0 and 1 and sum up to one. 052 * </p> 053 * 054 * <h2>Usage</h2> 055 * 056 * <p> 057 * This implementation uses a variant of the power iteration algorithm to 058 * compute the node ranks. It computes the approximate ranks iteratively going 059 * closer to the exact values at each iteration. The accuracy can be controlled 060 * by a precision parameter (see {@link #setPrecision(double)}). When the L1 061 * norm of the difference between two consecutive rank vectors becomes less than 062 * this parameter, the result is considered precise enough and the computation 063 * stops. 064 * </p> 065 * 066 * <p> 067 * This implementation works with both directed and undirected edges. An 068 * undirected edge acts as two directed arcs. 069 * </p> 070 * 071 * <p> 072 * The graph dynamics is taken into account and the ranks are not computed from 073 * scratch at each modification in the structure of the graph. However, the 074 * ranks become less and less accurate after each modification. To establish the 075 * desired precision, one must either explicitly call {@link #compute()} or ask 076 * for a rank of a node by calling {@link #getRank(Node)}. 077 * </p> 078 * 079 * <p> 080 * The computed ranks are stored in node attribute. The name of this attribute 081 * can be changed by a call to {@link #setRankAttribute(String)} but only before 082 * the call to {@link #init(Graph)}. Another way to obtain the ranks is to call 083 * {@link #getRank(Node)}. The second method is preferable because it will 084 * update the ranks if needed and will always return values within the desired 085 * precision. 086 * </p> 087 * 088 * <h2>Example</h2> 089 * 090 * <pre> 091 * Graph graph = new SingleGraph("test"); 092 * graph.addAttribute("ui.antialias", true); 093 * graph.addAttribute("ui.stylesheet", 094 * "node {fill-color: red; size-mode: dyn-size;} edge {fill-color:grey;}"); 095 * graph.display(); 096 * 097 * DorogovtsevMendesGenerator generator = new DorogovtsevMendesGenerator(); 098 * generator.setDirectedEdges(true, true); 099 * generator.addSink(graph); 100 * 101 * PageRank pageRank = new PageRank(); 102 * pageRank.init(graph); 103 * 104 * generator.begin(); 105 * while (graph.getNodeCount() < 100) { 106 * generator.nextEvents(); 107 * for (Node node : graph) { 108 * double rank = pageRank.getRank(node); 109 * node.addAttribute("ui.size", 110 * 5 + Math.sqrt(graph.getNodeCount() * rank * 20)); 111 * node.addAttribute("ui.label", String.format("%.2f%%", rank * 100)); 112 * } 113 * Thread.sleep(1000); 114 * } 115 * </pre> 116 * 117 * @complexity Each iteration takes O(m + n) time, where n is the number of 118 * nodes and m is the number of edges. The number of iterations 119 * needed to converge depends on the desired precision. 120 * 121 * @reference Lawrence Page, Sergey Brin, Rajeev Motwani and Terry Winograd. The 122 * PageRank citation ranking: Bringing order to the Web. 1999 123 * 124 * 125 */ 126public class PageRank implements DynamicAlgorithm, ElementSink { 127 /** 128 * Default damping factor 129 */ 130 public static final double DEFAULT_DAMPING_FACTOR = 0.85; 131 132 /** 133 * Default precision 134 */ 135 public static final double DEFAULT_PRECISION = 1.0e-5; 136 137 /** 138 * Default rank attribute 139 */ 140 public static final String DEFAULT_RANK_ATTRIBUTE = "PageRank"; 141 142 /** 143 * Current damping factor 144 */ 145 protected double dampingFactor; 146 147 /** 148 * Current numeric precision 149 */ 150 protected double precision; 151 152 /** 153 * Current rank attribute 154 */ 155 protected String rankAttribute; 156 157 /** 158 * Our graph 159 */ 160 protected Graph graph; 161 162 /** 163 * Am I up to date ? 164 */ 165 protected boolean upToDate; 166 167 /** 168 * The L1 norm of the difference between two consecutive rank vectors 169 */ 170 protected double normDiff; 171 172 /** 173 * Used to temporary store the new ranks during an iteration 174 */ 175 protected List<Double> newRanks; 176 177 /** 178 * total iteration count 179 */ 180 protected int iterationCount; 181 182 /** 183 * Verbose mode 184 */ 185 protected boolean verbose; 186 187 /** 188 * Creates a new instance. 189 * 190 * The damping factor, the precision and the rank attribute are set to their 191 * default values 192 */ 193 public PageRank() { 194 this(DEFAULT_DAMPING_FACTOR, DEFAULT_PRECISION, DEFAULT_RANK_ATTRIBUTE); 195 } 196 197 /** 198 * Creates a new instance. 199 * 200 * @param dampingFactor 201 * Damping factor 202 * @param precision 203 * Numeric precision 204 * @param rankAttribute 205 * Rank attribute 206 */ 207 public PageRank(double dampingFactor, double precision, String rankAttribute) { 208 setDampingFactor(dampingFactor); 209 setPrecision(precision); 210 setRankAttribute(rankAttribute); 211 verbose = false; 212 } 213 214 // parameters 215 216 /** 217 * Returns the current damping factor. 218 * 219 * @return The current damping factor 220 */ 221 public double getDampingFactor() { 222 return dampingFactor; 223 } 224 225 /** 226 * Sets the damping factor. 227 * 228 * @param dampingFactor 229 * The new damping factor 230 * @throws IllegalArgumentException 231 * If the damping factor is less than 0.01 or greater than 0.99 232 */ 233 public void setDampingFactor(double dampingFactor) 234 throws IllegalArgumentException { 235 if (dampingFactor < 0.01 || dampingFactor > 0.99) 236 throw new IllegalArgumentException( 237 "The damping factor must be between 0.01 and 0.99"); 238 this.dampingFactor = dampingFactor; 239 upToDate = false; 240 } 241 242 /** 243 * Returns the currently used numeric precision 244 * 245 * @return The precision 246 */ 247 public double getPrecision() { 248 return precision; 249 } 250 251 /** 252 * Sets the numeric precision. Precision values close to zero lead to more 253 * accurate results, but slower convergence 254 * 255 * @param precision 256 * The new precision 257 * @throws IllegalArgumentException 258 * if the precision is less than 1.0e-7 259 */ 260 public void setPrecision(double precision) throws IllegalArgumentException { 261 if (precision < 1.0e-7) 262 throw new IllegalArgumentException("Precision is too small"); 263 this.precision = precision; 264 upToDate = false; 265 } 266 267 /** 268 * Returns the current rank attribute 269 * 270 * @return The current rank attribute 271 */ 272 public String getRankAttribute() { 273 return rankAttribute; 274 } 275 276 /** 277 * Sets the rank attribute. 278 * 279 * The computed ranks of each node are stored as values of this attribute. 280 * 281 * @param rankAttribute 282 * The node attribute used to store the computed ranks 283 * @throws IllegalStateException 284 * if the algorithm is already initialized 285 */ 286 public void setRankAttribute(String rankAttribute) 287 throws IllegalStateException { 288 if (graph != null) 289 throw new IllegalStateException( 290 "this method can be called only before init"); 291 this.rankAttribute = rankAttribute; 292 } 293 294 /** 295 * Switches on or off the verbose mode. 296 * 297 * In verbose mode the algorithm prints at each iteration the number of 298 * iterations and the L1 norm of the difference between the current and the 299 * previous rank vectors. 300 * 301 * @param verbose 302 * Verbose mode 303 */ 304 public void setVerbose(boolean verbose) { 305 this.verbose = verbose; 306 } 307 308 // DynamicAlgorithm implementation 309 310 public void init(Graph graph) { 311 this.graph = graph; 312 graph.addElementSink(this); 313 double initialRank = 1.0 / graph.getNodeCount(); 314 for (Node node : graph) 315 node.addAttribute(rankAttribute, initialRank); 316 newRanks = new ArrayList<Double>(graph.getNodeCount()); 317 upToDate = false; 318 iterationCount = 0; 319 } 320 321 public void compute() { 322 if (upToDate) 323 return; 324 do { 325 iteration(); 326 if (verbose) 327 System.err.printf("%6d%16.8f%n", iterationCount, normDiff); 328 } while (normDiff > precision); 329 upToDate = true; 330 } 331 332 public void terminate() { 333 graph.removeElementSink(this); 334 newRanks.clear(); 335 newRanks = null; 336 graph = null; 337 } 338 339 // ElementSink implementation 340 341 public void nodeAdded(String sourceId, long timeId, String nodeId) { 342 // the initial rank of the new node will be 0 343 graph.getNode(nodeId).addAttribute(rankAttribute, 344 graph.getNodeCount() == 1 ? 1.0 : 0.0); 345 upToDate = false; 346 } 347 348 public void nodeRemoved(String sourceId, long timeId, String nodeId) { 349 // removed node will give equal parts of its rank to the others 350 double part = graph.getNode(nodeId).getNumber(rankAttribute) 351 / (graph.getNodeCount() - 1); 352 for (Node node : graph) 353 if (!node.getId().equals(nodeId)) 354 node.addAttribute(rankAttribute, node.getNumber(rankAttribute) 355 + part); 356 upToDate = false; 357 } 358 359 public void edgeAdded(String sourceId, long timeId, String edgeId, 360 String fromNodeId, String toNodeId, boolean directed) { 361 upToDate = false; 362 } 363 364 public void edgeRemoved(String sourceId, long timeId, String edgeId) { 365 upToDate = false; 366 } 367 368 public void graphCleared(String sourceId, long timeId) { 369 upToDate = true; 370 } 371 372 public void stepBegins(String sourceId, long timeId, double step) { 373 } 374 375 // helpers 376 377 protected void iteration() { 378 double dampingTerm = (1 - dampingFactor) / graph.getNodeCount(); 379 newRanks.clear(); 380 double danglingRank = 0; 381 for (int i = 0; i < graph.getNodeCount(); i++) { 382 Node node = graph.getNode(i); 383 double sum = 0; 384 for (int j = 0; j < node.getInDegree(); j++) { 385 Node other = node.getEnteringEdge(j).getOpposite(node); 386 sum += other.getNumber(rankAttribute) / other.getOutDegree(); 387 } 388 newRanks.add(dampingTerm + dampingFactor * sum); 389 if (node.getOutDegree() == 0) 390 danglingRank += node.getNumber(rankAttribute); 391 } 392 danglingRank *= dampingFactor / graph.getNodeCount(); 393 394 normDiff = 0; 395 for (int i = 0; i < graph.getNodeCount(); i++) { 396 Node node = graph.getNode(i); 397 double currentRank = node.getNumber(rankAttribute); 398 double newRank = newRanks.get(i) + danglingRank; 399 normDiff += Math.abs(newRank - currentRank); 400 node.addAttribute(rankAttribute, newRank); 401 } 402 iterationCount++; 403 } 404 405 // results 406 407 /** 408 * Returns the rank of a node. If the ranks are not up to date, recomputes 409 * them 410 * 411 * @param node 412 * A node 413 * @return The rank of the node 414 */ 415 public double getRank(Node node) { 416 compute(); 417 return node.getNumber(rankAttribute); 418 } 419 420 /** 421 * Returns the total number of iterations. 422 * 423 * This number accumulates the number of iterations performed by each call 424 * to {@link #compute()}. It is reset to zero in the calls to 425 * {@link #init(Graph)}. 426 * 427 * @return The number of iterations 428 */ 429 public int getIterationCount() { 430 return iterationCount; 431 } 432}