001/*
002 * Copyright 2006 - 2013
003 *     Stefan Balev     <stefan.balev@graphstream-project.org>
004 *     Julien Baudry    <julien.baudry@graphstream-project.org>
005 *     Antoine Dutot    <antoine.dutot@graphstream-project.org>
006 *     Yoann Pigné      <yoann.pigne@graphstream-project.org>
007 *     Guilhelm Savin   <guilhelm.savin@graphstream-project.org>
008 * 
009 * This file is part of GraphStream <http://graphstream-project.org>.
010 * 
011 * GraphStream is a library whose purpose is to handle static or dynamic
012 * graph, create them from scratch, file or any source and display them.
013 * 
014 * This program is free software distributed under the terms of two licenses, the
015 * CeCILL-C license that fits European law, and the GNU Lesser General Public
016 * License. You can  use, modify and/ or redistribute the software under the terms
017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by
019 * the Free Software Foundation, either version 3 of the License, or (at your
020 * option) any later version.
021 * 
022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
024 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
025 * 
026 * You should have received a copy of the GNU Lesser General Public License
027 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
028 * 
029 * The fact that you are presently reading this means that you have had
030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms.
031 */
032package org.graphstream.algorithm.measure;
033
034import java.util.ArrayList;
035import java.util.HashMap;
036
037import org.graphstream.algorithm.Algorithm;
038import org.graphstream.graph.Edge;
039import org.graphstream.graph.Graph;
040
041/**
042 * Surprise measure.
043 * 
044 * Description from <a
045 * href="https://en.wikipedia.org/wiki/Surprise_(networks)">Wikipedia</a> :
046 * Surprise (denoted S) is a measure of community structure in complex networks.
047 * The name Surprise derives from the fact that its maximization finds the most
048 * surprising partition into communities of the network, that is, the most
049 * unlikely one. S accurately evaluates, in a global manner, the quality of a
050 * partition using a cumulative hypergeometric distribution.
051 * 
052 * @reference Rodrigo Aldecoa, Ignacio Marin,
053 *            "Deciphering Network Community Structure by Surprise", 2011, PLoS
054 *            ONE 6(9)
055 * 
056 */
057public class SurpriseMeasure implements Algorithm {
058
059        /**
060         * Default attribute key where the result of the algorithm, a double value,
061         * is stored.
062         */
063        public static final String ATTRIBUTE = "measure.surprise";
064
065        //
066        // Used to group nodes with no meta index under a fake null index.
067        //
068        private static final Object NULL = new Object();
069
070        /**
071         * Attribute of nodes containing meta index. Default is "meta.index".
072         */
073        protected String communityAttributeKey;
074
075        /**
076         * Attribute that will contain the result.
077         */
078        protected String surpriseAttributeKey;
079
080        /**
081         * Graph used in the computation.
082         */
083        protected Graph graph;
084
085        /**
086         * Default constructor.
087         */
088        public SurpriseMeasure() {
089                this("meta.index");
090        }
091
092        /**
093         * Constructor allowing to set the node attribute key containing index of
094         * organizations.
095         * 
096         * @param communityAttributeKey
097         *            key attribute of organizations
098         */
099        public SurpriseMeasure(String communityAttributeKey) {
100                this(communityAttributeKey, ATTRIBUTE);
101        }
102
103        /**
104         * Same as {@link #SurpriseMeasure(String)} but allowing to set the graph
105         * attribute that will contain the result of the computation.
106         * 
107         * @param communityAttributeKey
108         * @param surpriseAttributeKey
109         */
110        public SurpriseMeasure(String communityAttributeKey,
111                        String surpriseAttributeKey) {
112                this.communityAttributeKey = communityAttributeKey;
113                this.surpriseAttributeKey = surpriseAttributeKey;
114        }
115
116        /*
117         * (non-Javadoc)
118         * 
119         * @see
120         * org.graphstream.algorithm.Algorithm#init(org.graphstream.graph.Graph)
121         */
122        public void init(Graph graph) {
123                this.graph = graph;
124        }
125
126        /*
127         * (non-Javadoc)
128         * 
129         * @see org.graphstream.algorithm.Algorithm#compute()
130         */
131        public void compute() {
132                HashMap<Object, Integer> communities = new HashMap<Object, Integer>();
133                ArrayList<Integer> communitiesCount = new ArrayList<Integer>();
134
135                for (int i = 0; i < graph.getNodeCount(); i++) {
136                        Object community = graph.getNode(i).getAttribute(
137                                        communityAttributeKey);
138
139                        if (community == null)
140                                community = NULL;
141
142                        if (!communities.containsKey(community)) {
143                                communities.put(community, communities.size());
144                                communitiesCount.add(0);
145                        }
146
147                        int idx = communities.get(community);
148                        communitiesCount.set(idx, communitiesCount.get(idx) + 1);
149                }
150
151                if (communities.containsKey(NULL))
152                        System.err.printf("[WARNING] Some nodes do not have community.\n");
153
154                double F = graph.getNodeCount() * (graph.getNodeCount() - 1) / 2;
155                double p = 0;
156                double M = 0;
157                double n = graph.getEdgeCount();
158                double W;
159                double S = 0;
160
161                for (int i = 0; i < graph.getEdgeCount(); i++) {
162                        Edge e = graph.getEdge(i);
163                        Object idx0 = e.getNode0().getAttribute(communityAttributeKey);
164                        Object idx1 = e.getNode1().getAttribute(communityAttributeKey);
165
166                        if (idx0.equals(idx1))
167                                p++;
168                }
169
170                for (int i = 0; i < communitiesCount.size(); i++) {
171                        int k = communitiesCount.get(i);
172                        M += k * (k - 1) / 2;
173                }
174
175                W = Math.min(M, n);
176                S = cumulativeHypergeometricDistribution(p, W, F, n, M);
177                S = -Math.log(S);
178
179                graph.setAttribute(surpriseAttributeKey, S);
180        }
181
182        /**
183         * Get the last computed surprise value contained in the graph.
184         * 
185         * @return surprise value
186         */
187        public double getSurprise() {
188                if (graph == null)
189                        throw new NullPointerException(
190                                        "Graph is null. Is this algorithm initialized ?");
191
192                if (!graph.hasNumber(surpriseAttributeKey))
193                        throw new RuntimeException(
194                                        "No surprise value found. Have you called the compute() method ?");
195
196                return graph.getNumber(surpriseAttributeKey);
197        }
198
199        /**
200         * Helper to compute the binomial coefficient.
201         * 
202         * @param n
203         * @param r
204         * @return
205         */
206        public static double binomialCoefficient(double n, double r) {
207                if (r > n)
208                        return 0;
209
210                if (r == 0 || n == r)
211                        return 1;
212
213                double C = n;
214                double t = 1;
215
216                for (int i = 1; i < r; i++) {
217                        C *= (n - i);
218                        t *= i + 1;
219                }
220
221                return C / t;
222        }
223
224        /**
225         * Helper to compute the hypergeometric distribution. See <a href=
226         * "http://stattrek.com/probability-distributions/hypergeometric.aspx">this
227         * page</a> for more information about this function.
228         * 
229         * @param x
230         * @param N
231         * @param n
232         * @param k
233         * @return
234         */
235        public static double hypergeometricDistribution(double x, double N,
236                        double n, double k) {
237                return binomialCoefficient(k, x) * binomialCoefficient(N - k, n - x)
238                                / binomialCoefficient(N, n);
239        }
240
241        /**
242         * Helper to compute the cumulative hypergeometric distribution. See <a
243         * href=
244         * "http://stattrek.com/probability-distributions/hypergeometric.aspx">this
245         * page</a> for more information about this function.
246         * 
247         * @param xStart
248         * @param xEnd
249         * @param N
250         * @param n
251         * @param k
252         * @return
253         */
254        public static double cumulativeHypergeometricDistribution(double xStart,
255                        double xEnd, double N, double n, double k) {
256                double chd = 0;
257
258                for (double x = xStart; x <= xEnd; x += 1)
259                        chd += hypergeometricDistribution(x, N, n, k);
260
261                return chd;
262        }
263}