矩阵的协方差计算

时间:2022-04-24 19:14:29

矩阵的协方差矩阵是对称阵,用公式Cov(X, Y) = E[X * Y] - E[X] E[Y] 计算,其中E[X]和E[Y]是列的平局值,E[X*Y]是样本方差,可以用变换成Gramian矩阵减去E[X] E[Y] 后除以n-1,这样Cov(X, Y) = E[X * Y] - E[X] E[Y]  变换为 G[X*Y] /(m-1) - (m/m-1)E[X] E[Y].Gramian矩阵就是协方差的和。

这个逻辑就是Spark RowMatrix求协方差的逻辑,只不过运算的是RDD

代码如下


public class CovarianceTest
{


public static void main( String[] args )
{
double[][] test = new double[][]{
new double[]{
1, 2, 3
}, new double[]{
4, 5, 6
}, new double[]{
7, 8, 9
}
};


double[][] value = caleCovariance( test );
System.out.println( "done" );


}


private static double[][] caleCovariance( double[][] dss )
{
int count = dss.length;
int len = dss[0].length;
double[][] retValue = new double[count][len];


double[] means = caleMean( dss );
double[][] G = caleGramian( dss );
int m1 = count - 1;
for ( int i = 0; i < len; i++ )


{
double alpha = ( (Integer) count ).doubleValue( ) / m1 * means[i];
for ( int j = i; j < len; j++ )
{
double Gij = G[i][j] / m1 - alpha * means[j];
retValue[i][j] = Gij;
retValue[j][i] = Gij;
}
}


return retValue;
}


/**
* 计算每列的平均值

* @param dss
* @return
*/
private static double[] caleMean( double[][] dss )
{
int count = dss.length;
int len = dss[0].length;
double[] retValue = new double[len];
for ( int i = 0; i < len; i++ )
{
for ( int j = 0; j < count; j++ )
{
retValue[i] = retValue[i] + dss[j][i];
}
}
for ( int i = 0; i < len; i++ )
{
retValue[i] = retValue[i] / count;
}
return retValue;
}


/**
* 计算格拉姆矩阵矩阵

* @param dss
* @return
*/
private static double[][] caleGramian( double[][] dss )
{
int count = dss.length;
int len = dss[0].length;
double[][] retValue = new double[count][len];
for ( int i = 0; i < count; i++ )
{
for ( int j = i; j < len; j++ )
{
for ( int k = 0; k < count; k++ )
{
retValue[i][j] = retValue[i][j] + dss[k][i] * dss[k][j];
}
}
}


for ( int i = 0; i < count; i++ )
{
for ( int j = 0; j < len; j++ )
{
if ( retValue[i][j] == 0 )
{
retValue[i][j] = retValue[j][i];
}
}
}


return retValue;


}
}