001/*
002 * Copyright 2006 - 2013
003 *     Stefan Balev     <stefan.balev@graphstream-project.org>
004 *     Julien Baudry    <julien.baudry@graphstream-project.org>
005 *     Antoine Dutot    <antoine.dutot@graphstream-project.org>
006 *     Yoann Pigné      <yoann.pigne@graphstream-project.org>
007 *     Guilhelm Savin   <guilhelm.savin@graphstream-project.org>
008 * 
009 * This file is part of GraphStream <http://graphstream-project.org>.
010 * 
011 * GraphStream is a library whose purpose is to handle static or dynamic
012 * graph, create them from scratch, file or any source and display them.
013 * 
014 * This program is free software distributed under the terms of two licenses, the
015 * CeCILL-C license that fits European law, and the GNU Lesser General Public
016 * License. You can  use, modify and/ or redistribute the software under the terms
017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by
019 * the Free Software Foundation, either version 3 of the License, or (at your
020 * option) any later version.
021 * 
022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
024 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
025 * 
026 * You should have received a copy of the GNU Lesser General Public License
027 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
028 * 
029 * The fact that you are presently reading this means that you have had
030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms.
031 */
032package org.graphstream.algorithm;
033
034import java.util.ArrayList;
035import java.util.List;
036
037import org.graphstream.graph.Graph;
038import org.graphstream.graph.Node;
039import org.graphstream.stream.ElementSink;
040
041/**
042 * <p>
043 * The PageRank is an algorithm that measures the "importance" of the nodes in a
044 * graph. It assigns to each node a rank. This rank corresponds to the
045 * probability that a "random surfer" visits the node. The surfer goes from node
046 * to node in the following way: with probability <it>d</it> she chooses a
047 * random outgoing arc and with probability <it>1 - d</it> she "teleports" to a
048 * random node (possibly not connected to the current). The probability
049 * <it>d</it> is called damping factor. By default it is 0.85 but it can be
050 * customized (see {@link #setDampingFactor(double)}). The ranks are real
051 * numbers between 0 and 1 and sum up to one.
052 * </p>
053 * 
054 * <h2>Usage</h2>
055 * 
056 * <p>
057 * This implementation uses a variant of the power iteration algorithm to
058 * compute the node ranks. It computes the approximate ranks iteratively going
059 * closer to the exact values at each iteration. The accuracy can be controlled
060 * by a precision parameter (see {@link #setPrecision(double)}). When the L1
061 * norm of the difference between two consecutive rank vectors becomes less than
062 * this parameter, the result is considered precise enough and the computation
063 * stops.
064 * </p>
065 * 
066 * <p>
067 * This implementation works with both directed and undirected edges. An
068 * undirected edge acts as two directed arcs.
069 * </p>
070 * 
071 * <p>
072 * The graph dynamics is taken into account and the ranks are not computed from
073 * scratch at each modification in the structure of the graph. However, the
074 * ranks become less and less accurate after each modification. To establish the
075 * desired precision, one must either explicitly call {@link #compute()} or ask
076 * for a rank of a node by calling {@link #getRank(Node)}.
077 * </p>
078 * 
079 * <p>
080 * The computed ranks are stored in node attribute. The name of this attribute
081 * can be changed by a call to {@link #setRankAttribute(String)} but only before
082 * the call to {@link #init(Graph)}. Another way to obtain the ranks is to call
083 * {@link #getRank(Node)}. The second method is preferable because it will
084 * update the ranks if needed and will always return values within the desired
085 * precision.
086 * </p>
087 * 
088 * <h2>Example</h2>
089 * 
090 * <pre>
091 * Graph graph = new SingleGraph(&quot;test&quot;);
092 * graph.addAttribute(&quot;ui.antialias&quot;, true);
093 * graph.addAttribute(&quot;ui.stylesheet&quot;,
094 *              &quot;node {fill-color: red; size-mode: dyn-size;} edge {fill-color:grey;}&quot;);
095 * graph.display();
096 * 
097 * DorogovtsevMendesGenerator generator = new DorogovtsevMendesGenerator();
098 * generator.setDirectedEdges(true, true);
099 * generator.addSink(graph);
100 * 
101 * PageRank pageRank = new PageRank();
102 * pageRank.init(graph);
103 * 
104 * generator.begin();
105 * while (graph.getNodeCount() &lt; 100) {
106 *      generator.nextEvents();
107 *      for (Node node : graph) {
108 *              double rank = pageRank.getRank(node);
109 *              node.addAttribute(&quot;ui.size&quot;,
110 *                              5 + Math.sqrt(graph.getNodeCount() * rank * 20));
111 *              node.addAttribute(&quot;ui.label&quot;, String.format(&quot;%.2f%%&quot;, rank * 100));
112 *      }
113 *      Thread.sleep(1000);
114 * }
115 * </pre>
116 * 
117 * @complexity Each iteration takes O(m + n) time, where n is the number of
118 *             nodes and m is the number of edges. The number of iterations
119 *             needed to converge depends on the desired precision.
120 * 
121 * @reference Lawrence Page, Sergey Brin, Rajeev Motwani and Terry Winograd. The
122 *            PageRank citation ranking: Bringing order to the Web. 1999
123 * 
124 * 
125 */
126public class PageRank implements DynamicAlgorithm, ElementSink {
127        /**
128         * Default damping factor
129         */
130        public static final double DEFAULT_DAMPING_FACTOR = 0.85;
131
132        /**
133         * Default precision
134         */
135        public static final double DEFAULT_PRECISION = 1.0e-5;
136
137        /**
138         * Default rank attribute
139         */
140        public static final String DEFAULT_RANK_ATTRIBUTE = "PageRank";
141
142        /**
143         * Current damping factor
144         */
145        protected double dampingFactor;
146
147        /**
148         * Current numeric precision
149         */
150        protected double precision;
151
152        /**
153         * Current rank attribute
154         */
155        protected String rankAttribute;
156
157        /**
158         * Our graph
159         */
160        protected Graph graph;
161
162        /**
163         * Am I up to date ?
164         */
165        protected boolean upToDate;
166
167        /**
168         * The L1 norm of the difference between two consecutive rank vectors
169         */
170        protected double normDiff;
171
172        /**
173         * Used to temporary store the new ranks during an iteration
174         */
175        protected List<Double> newRanks;
176
177        /**
178         * total iteration count
179         */
180        protected int iterationCount;
181
182        /**
183         * Verbose mode
184         */
185        protected boolean verbose;
186
187        /**
188         * Creates a new instance.
189         * 
190         * The damping factor, the precision and the rank attribute are set to their
191         * default values
192         */
193        public PageRank() {
194                this(DEFAULT_DAMPING_FACTOR, DEFAULT_PRECISION, DEFAULT_RANK_ATTRIBUTE);
195        }
196
197        /**
198         * Creates a new instance.
199         * 
200         * @param dampingFactor
201         *            Damping factor
202         * @param precision
203         *            Numeric precision
204         * @param rankAttribute
205         *            Rank attribute
206         */
207        public PageRank(double dampingFactor, double precision, String rankAttribute) {
208                setDampingFactor(dampingFactor);
209                setPrecision(precision);
210                setRankAttribute(rankAttribute);
211                verbose = false;
212        }
213
214        // parameters
215
216        /**
217         * Returns the current damping factor.
218         * 
219         * @return The current damping factor
220         */
221        public double getDampingFactor() {
222                return dampingFactor;
223        }
224
225        /**
226         * Sets the damping factor.
227         * 
228         * @param dampingFactor
229         *            The new damping factor
230         * @throws IllegalArgumentException
231         *             If the damping factor is less than 0.01 or greater than 0.99
232         */
233        public void setDampingFactor(double dampingFactor)
234                        throws IllegalArgumentException {
235                if (dampingFactor < 0.01 || dampingFactor > 0.99)
236                        throw new IllegalArgumentException(
237                                        "The damping factor must be between 0.01 and 0.99");
238                this.dampingFactor = dampingFactor;
239                upToDate = false;
240        }
241
242        /**
243         * Returns the currently used numeric precision
244         * 
245         * @return The precision
246         */
247        public double getPrecision() {
248                return precision;
249        }
250
251        /**
252         * Sets the numeric precision. Precision values close to zero lead to more
253         * accurate results, but slower convergence
254         * 
255         * @param precision
256         *            The new precision
257         * @throws IllegalArgumentException
258         *             if the precision is less than 1.0e-7
259         */
260        public void setPrecision(double precision) throws IllegalArgumentException {
261                if (precision < 1.0e-7)
262                        throw new IllegalArgumentException("Precision is too small");
263                this.precision = precision;
264                upToDate = false;
265        }
266
267        /**
268         * Returns the current rank attribute
269         * 
270         * @return The current rank attribute
271         */
272        public String getRankAttribute() {
273                return rankAttribute;
274        }
275
276        /**
277         * Sets the rank attribute.
278         * 
279         * The computed ranks of each node are stored as values of this attribute.
280         * 
281         * @param rankAttribute
282         *            The node attribute used to store the computed ranks
283         * @throws IllegalStateException
284         *             if the algorithm is already initialized
285         */
286        public void setRankAttribute(String rankAttribute)
287                        throws IllegalStateException {
288                if (graph != null)
289                        throw new IllegalStateException(
290                                        "this method can be called only before init");
291                this.rankAttribute = rankAttribute;
292        }
293
294        /**
295         * Switches on or off the verbose mode.
296         * 
297         * In verbose mode the algorithm prints at each iteration the number of
298         * iterations and the L1 norm of the difference between the current and the
299         * previous rank vectors.
300         * 
301         * @param verbose
302         *            Verbose mode
303         */
304        public void setVerbose(boolean verbose) {
305                this.verbose = verbose;
306        }
307
308        // DynamicAlgorithm implementation
309
310        public void init(Graph graph) {
311                this.graph = graph;
312                graph.addElementSink(this);
313                double initialRank = 1.0 / graph.getNodeCount();
314                for (Node node : graph)
315                        node.addAttribute(rankAttribute, initialRank);
316                newRanks = new ArrayList<Double>(graph.getNodeCount());
317                upToDate = false;
318                iterationCount = 0;
319        }
320
321        public void compute() {
322                if (upToDate)
323                        return;
324                do {
325                        iteration();
326                        if (verbose)
327                                System.err.printf("%6d%16.8f%n", iterationCount, normDiff);
328                } while (normDiff > precision);
329                upToDate = true;
330        }
331
332        public void terminate() {
333                graph.removeElementSink(this);
334                newRanks.clear();
335                newRanks = null;
336                graph = null;
337        }
338
339        // ElementSink implementation
340
341        public void nodeAdded(String sourceId, long timeId, String nodeId) {
342                // the initial rank of the new node will be 0
343                graph.getNode(nodeId).addAttribute(rankAttribute,
344                                graph.getNodeCount() == 1 ? 1.0 : 0.0);
345                upToDate = false;
346        }
347
348        public void nodeRemoved(String sourceId, long timeId, String nodeId) {
349                // removed node will give equal parts of its rank to the others
350                double part = graph.getNode(nodeId).getNumber(rankAttribute)
351                                / (graph.getNodeCount() - 1);
352                for (Node node : graph)
353                        if (!node.getId().equals(nodeId))
354                                node.addAttribute(rankAttribute, node.getNumber(rankAttribute)
355                                                + part);
356                upToDate = false;
357        }
358
359        public void edgeAdded(String sourceId, long timeId, String edgeId,
360                        String fromNodeId, String toNodeId, boolean directed) {
361                upToDate = false;
362        }
363
364        public void edgeRemoved(String sourceId, long timeId, String edgeId) {
365                upToDate = false;
366        }
367
368        public void graphCleared(String sourceId, long timeId) {
369                upToDate = true;
370        }
371
372        public void stepBegins(String sourceId, long timeId, double step) {
373        }
374
375        // helpers
376
377        protected void iteration() {
378                double dampingTerm = (1 - dampingFactor) / graph.getNodeCount();
379                newRanks.clear();
380                double danglingRank = 0;
381                for (int i = 0; i < graph.getNodeCount(); i++) {
382                        Node node = graph.getNode(i);
383                        double sum = 0;
384                        for (int j = 0; j < node.getInDegree(); j++) {
385                                Node other = node.getEnteringEdge(j).getOpposite(node);
386                                sum += other.getNumber(rankAttribute) / other.getOutDegree();
387                        }
388                        newRanks.add(dampingTerm + dampingFactor * sum);
389                        if (node.getOutDegree() == 0)
390                                danglingRank += node.getNumber(rankAttribute);
391                }
392                danglingRank *= dampingFactor / graph.getNodeCount();
393
394                normDiff = 0;
395                for (int i = 0; i < graph.getNodeCount(); i++) {
396                        Node node = graph.getNode(i);
397                        double currentRank = node.getNumber(rankAttribute);
398                        double newRank = newRanks.get(i) + danglingRank;
399                        normDiff += Math.abs(newRank - currentRank);
400                        node.addAttribute(rankAttribute, newRank);
401                }
402                iterationCount++;
403        }
404
405        // results
406
407        /**
408         * Returns the rank of a node. If the ranks are not up to date, recomputes
409         * them
410         * 
411         * @param node
412         *            A node
413         * @return The rank of the node
414         */
415        public double getRank(Node node) {
416                compute();
417                return node.getNumber(rankAttribute);
418        }
419
420        /**
421         * Returns the total number of iterations.
422         * 
423         * This number accumulates the number of iterations performed by each call
424         * to {@link #compute()}. It is reset to zero in the calls to
425         * {@link #init(Graph)}.
426         * 
427         * @return The number of iterations
428         */
429        public int getIterationCount() {
430                return iterationCount;
431        }
432}