学习笔记(4)-社区发现评价指标

时间:2022-12-08 09:10:54

目前使用的主要有:Q(Modulartiy),Jaccard指数与Fsame值,NMI也是常用指标。

c语言实现:

//当i和J属于同一个社团时,E函数等于l,否则等于0. 故只需计算同一社区的函数值
double Modulartiy(int * cluster_assignment, int ** M, int vertices)
{
int i, j, k;
int maxlable = cluster_assignment[0]; //最大标号
for (i = 1; i < vertices; i++)
{
if (maxlable < cluster_assignment[i])
maxlable = cluster_assignment[i];
}
int TotalLinks; //总边数 (m)
TotalLinks= SumSumMartex(M, vertices, vertices)/2;
double Q = 0; //Q函数
vector <int> index;
for (i = 1; i <= maxlable; i++)
{
FindIndexInVector(cluster_assignment, index, vertices, i);
unsigned len = index.size();
double li = 0;
//社区内部边的边数 (I)
for (j = 0; j < len; j++)
for (k = 0; k < len; k++)
li += M[index[j]][index[k]];
li = li / 2;
//社区内所有顶点的度之和 (D)
double di = 0;
for (j = 0; j < len; j++)
for (k = 0; k < vertices; k++)
di = di + M[index[j]][k];
Q = Q + (li - (di * di) /(TotalLinks*4) );
index.clear();
}
Q = Q / TotalLinks;
return Q ;
}
/************找出两种分类结果中相同标签的元素个数**********/
double FindjaccardIndex(int* A, int* B, int vertices)
{
int a,b,c;
a=0; b=0; c=0;
for(int i=0; i<vertices; i++)
{
for(int j=0; j<vertices; j++)
{
if((A[i]==A[j])&&(B[i]==B[j])) a++;
if((A[i]==A[j])&&(B[i]!=B[j])) b++;
if((A[i]!=A[j])&&(B[i]==B[j])) c++;
}
}
return a/(double)(a+b+c);
}
/********************计算两种分区结果的Fsame值***************/
double FsameIndex(int*A, int*B, int As, int Bs)
{
int **S; //生成As*Bs矩阵
int i, j;
S = new int * [As];
for ( i = 0; i < As; i++ )
{
S[i] = new int [Bs];
}
for ( i = 0; i < As; i++)
{
for ( j=0; j<Bs; j++)
S[i][j]=0;
}
for (j=0; j<Vertices; j++)
{
S[A[j]-1][B[j]-1]++;
}
//计算fsame的值
double fsame;
//辅助队列
int* r = new int[As];
for ( i = 0; i < As; i++ ) r[i]=-1;
int* c = new int[Bs];
for ( i = 0; i < Bs; i++ ) c[i]=-1;
//统计每行及每列最大的数
for ( i = 0; i < As; i++)
{
for ( j = 0; j < Bs; j++)
{
if (S[i][j]>r[i]) r[i]=S[i][j];
if (S[i][j]>c[j]) c[j]=S[i][j];
}
}
//求和
int x=0, y=0;
for ( i = 0; i < As; i++ ) x+=r[i];
for ( i = 0; i < Bs; i++ ) y+=c[i];

fsame=50.0*(x+y)/(double)Vertices;
return fsame;
}

Python 实现

def Modulartiy(A, coms, sums):
Q = 0.0
for eachc in coms:
li = 0
for eachp in coms[eachc]:
for eachq in coms[eachc]:
li += A[eachp][eachq]
li /= 2
di = 0
for eachp in coms[eachc]:
for eachq in range(vertices):
di += A[eachp][eachq]
Q = Q + (li - (di * di) /(sums*4))
Q = Q / float(sums)
return Q

def JaccardIndex(cluA,cluB):
a, b, c = 0
for i in range(vertices):
for j in range(vertices):
if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
a += 1
if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
b += 1
if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
c += 1
return a/(double)(a+b+c)

def FsameIndex(cluA,cluB):
S = mat([[0 for i in range(len(cluB))] for j in range(len(cluA))])
for i in range(vertices):
S[cluA[i]][cluB[i]] = 1
r = sum(S.max(0))
c = sum(S.max(1))
fsame = 50.0*(r+c)/float(Vertices)
return fsame

def NMI(cluA,cluB):
#混淆矩阵
cmat = [[0 for i in range(len(cluA))] for j in range(len(cluB))]
i = 0
j = 0
for eacha in cluA:
for eachb in cluB:
cmat[i][j] = len(set(cluA[eacha]) & set(cluB[eachb]))
j += 1
i += 1
j = 0
#print cmat
#the nmi_numerator part
nmi_numerator = 0.0
for i in range(len(cluA)):
for j in range(len(cluB)):
if (cmat[i][j]!=0):
row = 0
column = 0
for k in range(len(cluB)):
row = row + cmat[i][k]
for l in range(len(cluA)):
column = column + cmat[l][j]
nmi_numerator = nmi_numerator + cmat[i][j] * log10((cmat[i][j] * vertices)/float(row * column))
nmi_numerator = -2 * nmi_numerator
#the denominator part
nmi_denominator1 = 0.0
nmi_denominator2 = 0.0
nmi = 0.0
for i in range(len(cluA)):
row = 0
for k in range(len(cluB)):
row = row + cmat[i][k]
if(row != 0):
nmi_denominator1 = nmi_denominator1 + row * log10(row / float(vertices))
for j in range(len(cluB)):
column = 0
for l in range(len(cluA)):
column = column + cmat[l][j];
if(column != 0):
nmi_denominator2 = nmi_denominator2 + column * log10(column / float(vertices))
nmi_denominator = nmi_denominator1 + nmi_denominator2
print nmi_numerator,nmi_denominator
if(nmi_denominator != 0):
nmi = nmi_numerator/float(nmi_denominator)
return nmi

附:NMI指数C++:

double normalized_mutual_information(int * clu_assignment1,int * clu_assignment2, int length)
{
int num_nodes = length;
int clu_num1 = clu_assignment1[0];
int clu_num2 = clu_assignment2[0];
for (int m = 1; m < num_nodes; m++)
{
if (clu_num1 < clu_assignment1[m])
clu_num1 = clu_assignment1[m];
if (clu_num2 < clu_assignment2[m])
clu_num2 = clu_assignment2[m];
}
// Compute the confusion matrix
int** cmat;
cmat = new int * [clu_num1];
for (int i = 0; i < clu_num1; i++ )
{
cmat[i] = new int [clu_num2];
}
for (int i = 0; i < clu_num1; i++ )
for (int j = 0; j < clu_num2; j++ )
{
cmat[i][j] = 0;
}
for (int i = 0; i < clu_num1; i++ )
{
vector <int> index1;
for (int k = 0; k < num_nodes; k++ )
{
if (clu_assignment1[k] == i + 1)
index1.push_back(k);
}
for (int j = 0; j < clu_num2; j++ )
{
vector <int> index2;
for (int k = 0; k < num_nodes; k++ )
{
if (clu_assignment2[k] == j + 1)
index2.push_back(k);
}
int numtemp = 0;
for (unsigned int m = 0; m < index1.size(); m++ )
{
for(unsigned int n = 0; n < index2.size(); n++ )
{
if (index1[m] == index2[n])
numtemp = numtemp + 1;
}
}
cmat[i][j] = numtemp;
}
}
//Compute the Normalized Mutual Information
//the numerator part
double nmi_numerator = 0.0;
for (int i = 0; i < clu_num1; i++ )
{
for (int j = 0; j < clu_num2; j++)
{
if (cmat[i][j] != 0)
{
int row = 0;
int column = 0;
for (int k = 0; k < clu_num2; k++ )
{
row = row + cmat[i][k];
}
for (int l = 0; l < clu_num1; l++ )
{
column = column + cmat[l][j];
}
nmi_numerator = nmi_numerator + cmat[i][j] * log10(double(cmat[i][j] * num_nodes)/double(row * column));
}
}
}
nmi_numerator = -2 * nmi_numerator;
/////////the denominator part
double nmi_denominator1 = 0;
double nmi_denominator2 = 0;
for (int i = 0; i < clu_num1; i++ )
{
double row = 0;
for (int k = 0; k < clu_num2; k++ )
row = row + cmat[i][k];
nmi_denominator1 = nmi_denominator1 + row * log10(row / num_nodes);
}
for (int j = 0; j < clu_num2; j++ )
{
double column = 0;
for (int l = 0; l < clu_num1; l++ )
column = column + cmat[l][j];
nmi_denominator2 = nmi_denominator2 + column * log10(column / num_nodes);
}
double nmi_denominator = nmi_denominator1 + nmi_denominator2;
double nmi = nmi_numerator/nmi_denominator;
for (int i = 0; i < clu_num1;i++)
delete [] cmat[i];
delete [] cmat;
return nmi;
}