001/* 002 * Copyright 2006 - 2013 003 * Stefan Balev <stefan.balev@graphstream-project.org> 004 * Julien Baudry <julien.baudry@graphstream-project.org> 005 * Antoine Dutot <antoine.dutot@graphstream-project.org> 006 * Yoann Pigné <yoann.pigne@graphstream-project.org> 007 * Guilhelm Savin <guilhelm.savin@graphstream-project.org> 008 * 009 * This file is part of GraphStream <http://graphstream-project.org>. 010 * 011 * GraphStream is a library whose purpose is to handle static or dynamic 012 * graph, create them from scratch, file or any source and display them. 013 * 014 * This program is free software distributed under the terms of two licenses, the 015 * CeCILL-C license that fits European law, and the GNU Lesser General Public 016 * License. You can use, modify and/ or redistribute the software under the terms 017 * of the CeCILL-C license as circulated by CEA, CNRS and INRIA at the following 018 * URL <http://www.cecill.info> or under the terms of the GNU LGPL as published by 019 * the Free Software Foundation, either version 3 of the License, or (at your 020 * option) any later version. 021 * 022 * This program is distributed in the hope that it will be useful, but WITHOUT ANY 023 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 024 * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. 025 * 026 * You should have received a copy of the GNU Lesser General Public License 027 * along with this program. If not, see <http://www.gnu.org/licenses/>. 028 * 029 * The fact that you are presently reading this means that you have had 030 * knowledge of the CeCILL-C and LGPL licenses and that you accept their terms. 031 */ 032package org.graphstream.algorithm.measure; 033 034import org.graphstream.graph.Node; 035 036/** 037 * Computes and updated the current Normalized Mutual Information (NMI) measure 038 * between a dynamically-performed community assignment on a graph as it evolves 039 * and a fixed assignment, known as reference. 040 * 041 * @reference L. Danon, A. Diaz-Guilera, J. Duch, and A. Arenas, “Comparing 042 * community structure identification,” <i>Journal of Statistical 043 * Mechanics: Theory and Experiment</i>, vol. 2005, no. 09, pp. 044 * P09008+, September 2005. 045 * 046 * @author Guillaume-Jean Herbiet 047 * 048 */ 049public class NormalizedMutualInformation extends CommunityRelativeMeasure { 050 051 /** 052 * New NMI measure, using the given marker for the dynamically performed 053 * assignment, and the default marker for the reference assignment. 054 * 055 * @param marker 056 * name of the attribute marking the computed communities. 057 */ 058 public NormalizedMutualInformation(String marker) { 059 super(marker); 060 } 061 062 /** 063 * New NMI measure, using the given marker for the dynamically performed 064 * assignment, and the given refrenceMarker for the reference assignment. 065 * 066 * @param marker 067 * name of the attribute marking the computed communities. 068 * @param referenceMarker 069 * name of the attribute marking the reference communities. 070 */ 071 public NormalizedMutualInformation(String marker, String referenceMarker) { 072 super(marker, referenceMarker); 073 } 074 075 /** 076 * Compute the new NMI measure value. 077 * 078 * @complexity O(2*C^2 + 6*C), where C is the expected number of communities 079 * in the graph. 080 */ 081 @Override 082 public void compute() { 083 if (graphChanged) { 084 085 // Get the updated confusion matrix 086 int[][] N = confusionMatrix(); 087 088 // Get the arrays of the rows and columns sums 089 int[] N_A = new int[referenceCommunities.size()]; 090 int[] N_B = new int[communities.size()]; 091 for (int i = 0; i < N_A.length; i++) { 092 int ttl = 0; 093 for (int j = 0; j < N_B.length; j++) 094 ttl += N[i][j]; 095 N_A[i] = ttl; 096 } 097 for (int j = 0; j < N_B.length; j++) { 098 int ttl = 0; 099 for (int i = 0; i < N_A.length; i++) 100 ttl += N[i][j]; 101 N_B[j] = ttl; 102 } 103 104 // Get the total nodes number 105 float n = graph.getNodeCount(); 106 107 /* 108 * Let's go and compute the NMI 109 */ 110 111 // First the numerator 112 float num = 0; 113 for (int i = 0; i < N_A.length; i++) { 114 for (int j = 0; j < N_B.length; j++) { 115 if (N[i][j] > 0) { 116 num += -2.0 * N[i][j] 117 * Math.log((N[i][j] * n) / (N_A[i] * N_B[j])); 118 } 119 } 120 } 121 122 // Then the denominator 123 float denom = 0; 124 for (int i = 0; i < N_A.length; i++) 125 denom += N_A[i] * Math.log(N_A[i] / n); 126 for (int j = 0; j < N_B.length; j++) 127 denom += N_B[j] * Math.log(N_B[j] / n); 128 129 // Update the metric value 130 M = num / denom; 131 132 // Valid unless the graph changes again 133 graphChanged = false; 134 } 135 } 136 137 /** 138 * Computes the confusion matrix between reference and current community 139 * assignment, i.e. the matrix N where each element N[i][j] is the number of 140 * nodes in reference community i, also in current community j. 141 * 142 * @complexity O(C^2 + 2C), where C is the expected number of communities in 143 * the graph. 144 * 145 * @return the confusion matrix N of all N[i][j] 146 */ 147 protected int[][] confusionMatrix() { 148 // Size of the confusion matrix 149 int c_A = referenceCommunities.size(); 150 int c_B = communities.size(); 151 152 // Confusion matrix itself 153 int[][] N = new int[c_A][]; 154 155 // Relation between confusion matrix indices and communities 156 Object keys_A[] = new Object[c_A]; 157 Object keys_B[] = new Object[c_B]; 158 159 int k_A = 0; 160 for (Object key : referenceCommunities.keySet()) 161 keys_A[k_A++] = key; 162 163 int k_B = 0; 164 for (Object key : communities.keySet()) 165 keys_B[k_B++] = key; 166 167 // Initialize each row and fill each element of the confusion matrix 168 for (int i = 0; i < c_A; ++i) { 169 N[i] = new int[c_B]; 170 171 for (int j = 0; j < c_B; j++) { 172 // Number of common nodes between communities indexed by i and j 173 int commonNodes = 0; 174 175 // Increase the number of common nodes for each node 176 // of the found community indexed by j 177 // also appearing in the real community indexed by i 178 for (Node n : communities.get(keys_B[j])) { 179 if (referenceCommunities.get(keys_A[i]).contains(n)) 180 commonNodes++; 181 } 182 183 // Sets the confusion matrix element 184 N[i][j] = commonNodes; 185 } 186 } 187 return N; 188 } 189}