001/*
002 * Copyright 2006 - 2013
003 *     Stefan Balev     <stefan.balev@graphstream-project.org>
004 *     Julien Baudry    <julien.baudry@graphstream-project.org>
005 *     Antoine Dutot    <antoine.dutot@graphstream-project.org>
006 *     Yoann Pigné      <yoann.pigne@graphstream-project.org>
007 *     Guilhelm Savin   <guilhelm.savin@graphstream-project.org>
008 * 
009 * This file is part of GraphStream <http://graphstream-project.org>.
010 * 
011 * GraphStream is a library whose purpose is to handle static or dynamic
012 * graph, create them from scratch, file or any source and display them.
013 * 
014 * This program is free software distributed under the terms of two licenses, the
015 * CeCILL-C license that fits European law, and the GNU Lesser General Public
016 * License. You can  use, modify and/ or redistribute the software under the terms
017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by
019 * the Free Software Foundation, either version 3 of the License, or (at your
020 * option) any later version.
021 * 
022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
024 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
025 * 
026 * You should have received a copy of the GNU Lesser General Public License
027 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
028 * 
029 * The fact that you are presently reading this means that you have had
030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms.
031 */
032package org.graphstream.algorithm.measure;
033
034import org.graphstream.graph.Node;
035
036/**
037 * Computes and updated the current Normalized Mutual Information (NMI) measure
038 * between a dynamically-performed community assignment on a graph as it evolves
039 * and a fixed assignment, known as reference.
040 * 
041 * @reference L. Danon, A. Diaz-Guilera, J. Duch, and A. Arenas, “Comparing
042 *            community structure identification,” <i>Journal of Statistical
043 *            Mechanics: Theory and Experiment</i>, vol. 2005, no. 09, pp.
044 *            P09008+, September 2005.
045 * 
046 * @author Guillaume-Jean Herbiet
047 * 
048 */
049public class NormalizedMutualInformation extends CommunityRelativeMeasure {
050
051        /**
052         * New NMI measure, using the given marker for the dynamically performed
053         * assignment, and the default marker for the reference assignment.
054         * 
055         * @param marker
056         *            name of the attribute marking the computed communities.
057         */
058        public NormalizedMutualInformation(String marker) {
059                super(marker);
060        }
061
062        /**
063         * New NMI measure, using the given marker for the dynamically performed
064         * assignment, and the given refrenceMarker for the reference assignment.
065         * 
066         * @param marker
067         *            name of the attribute marking the computed communities.
068         * @param referenceMarker
069         *            name of the attribute marking the reference communities.
070         */
071        public NormalizedMutualInformation(String marker, String referenceMarker) {
072                super(marker, referenceMarker);
073        }
074
075        /**
076         * Compute the new NMI measure value.
077         * 
078         * @complexity O(2*C^2 + 6*C), where C is the expected number of communities
079         *             in the graph.
080         */
081        @Override
082        public void compute() {
083                if (graphChanged) {
084
085                        // Get the updated confusion matrix
086                        int[][] N = confusionMatrix();
087
088                        // Get the arrays of the rows and columns sums
089                        int[] N_A = new int[referenceCommunities.size()];
090                        int[] N_B = new int[communities.size()];
091                        for (int i = 0; i < N_A.length; i++) {
092                                int ttl = 0;
093                                for (int j = 0; j < N_B.length; j++)
094                                        ttl += N[i][j];
095                                N_A[i] = ttl;
096                        }
097                        for (int j = 0; j < N_B.length; j++) {
098                                int ttl = 0;
099                                for (int i = 0; i < N_A.length; i++)
100                                        ttl += N[i][j];
101                                N_B[j] = ttl;
102                        }
103
104                        // Get the total nodes number
105                        float n = graph.getNodeCount();
106
107                        /*
108                         * Let's go and compute the NMI
109                         */
110
111                        // First the numerator
112                        float num = 0;
113                        for (int i = 0; i < N_A.length; i++) {
114                                for (int j = 0; j < N_B.length; j++) {
115                                        if (N[i][j] > 0) {
116                                                num += -2.0 * N[i][j]
117                                                                * Math.log((N[i][j] * n) / (N_A[i] * N_B[j]));
118                                        }
119                                }
120                        }
121
122                        // Then the denominator
123                        float denom = 0;
124                        for (int i = 0; i < N_A.length; i++)
125                                denom += N_A[i] * Math.log(N_A[i] / n);
126                        for (int j = 0; j < N_B.length; j++)
127                                denom += N_B[j] * Math.log(N_B[j] / n);
128
129                        // Update the metric value
130                        M = num / denom;
131
132                        // Valid unless the graph changes again
133                        graphChanged = false;
134                }
135        }
136
137        /**
138         * Computes the confusion matrix between reference and current community
139         * assignment, i.e. the matrix N where each element N[i][j] is the number of
140         * nodes in reference community i, also in current community j.
141         * 
142         * @complexity O(C^2 + 2C), where C is the expected number of communities in
143         *             the graph.
144         * 
145         * @return the confusion matrix N of all N[i][j]
146         */
147        protected int[][] confusionMatrix() {
148                // Size of the confusion matrix
149                int c_A = referenceCommunities.size();
150                int c_B = communities.size();
151
152                // Confusion matrix itself
153                int[][] N = new int[c_A][];
154
155                // Relation between confusion matrix indices and communities
156                Object keys_A[] = new Object[c_A];
157                Object keys_B[] = new Object[c_B];
158
159                int k_A = 0;
160                for (Object key : referenceCommunities.keySet())
161                        keys_A[k_A++] = key;
162
163                int k_B = 0;
164                for (Object key : communities.keySet())
165                        keys_B[k_B++] = key;
166
167                // Initialize each row and fill each element of the confusion matrix
168                for (int i = 0; i < c_A; ++i) {
169                        N[i] = new int[c_B];
170
171                        for (int j = 0; j < c_B; j++) {
172                                // Number of common nodes between communities indexed by i and j
173                                int commonNodes = 0;
174
175                                // Increase the number of common nodes for each node
176                                // of the found community indexed by j
177                                // also appearing in the real community indexed by i
178                                for (Node n : communities.get(keys_B[j])) {
179                                        if (referenceCommunities.get(keys_A[i]).contains(n))
180                                                commonNodes++;
181                                }
182
183                                // Sets the confusion matrix element
184                                N[i][j] = commonNodes;
185                        }
186                }
187                return N;
188        }
189}