利用Python实现高斯混合模型(GMM)

时间:2025-03-12 22:45:35
#!/usr/bin/python3 # -*- coding: utf-8 -*- # @Time : 2018/7/19 10:17 # @Author : DaiPuwei # @FileName: # @Software: PyCharm # @E-mail :771830171@ # @Blog :/qq_30091945 import numpy as np import matplotlib as mpl import as plt from import load_iris from import Normalizer from import accuracy_score class GMM: def __init__(self,Data,K,weights = None,means = None,covars = None): """ 这是GMM(高斯混合模型)类的构造函数 :param Data: 训练数据 :param K: 高斯分布的个数 :param weigths: 每个高斯分布的初始概率(权重) :param means: 高斯分布的均值向量 :param covars: 高斯分布的协方差矩阵集合 """ = Data = K if weights is not None: = weights else: = () /= () # 归一化 col = ()[1] if means is not None: = means else: = [] for i in range(): mean = (col) #mean = mean / (mean) # 归一化 (mean) if covars is not None: = covars else: = [] for i in range(): cov = (col,col) #cov = cov / (cov) # 归一化 (cov) # cov是,但是是list def Gaussian(self,x,mean,cov): """ 这是自定义的高斯分布概率密度函数 :param x: 输入数据 :param mean: 均值数组 :param cov: 协方差矩阵 :return: x的概率 """ dim = (cov)[0] # cov的行列式为零时的措施 covdet = (cov + (dim) * 0.001) covinv = (cov + (dim) * 0.001) xdiff = (x - mean).reshape((1,dim)) # 概率密度 prob = 1.0/(((2*,dim)*(covdet),0.5))*\ (-0.5*(covinv).dot())[0][0] return prob def GMM_EM(self): """ 这是利用EM算法进行优化GMM参数的函数 :return: 返回各组数据的属于每个分类的概率 """ loglikelyhood = 0 oldloglikelyhood = 1 len,dim = () # gamma表示第n个样本属于第k个混合高斯的概率 gammas = [() for i in range(len)] while (loglikelyhood-oldloglikelyhood) > 0.00000001: oldloglikelyhood = loglikelyhood # E-step for n in range(len): # respons是GMM的EM算法中的权重w,即后验概率 respons = [[k] * ([n], [k], [k]) for k in range()] respons = (respons) sum_respons = (respons) gammas[n] = respons/sum_respons # M-step for k in range(): #nk表示N个样本中有多少属于第k个高斯 nk = ([gammas[n][k] for n in range(len)]) # 更新每个高斯分布的概率 [k] = 1.0 * nk / len # 更新高斯分布的均值 [k] = (1.0/nk) * ([gammas[n][k] * [n] for n in range(len)], axis=0) xdiffs = - [k] # 更新高斯分布的协方差矩阵 [k] = (1.0/nk)*([gammas[n][k]*xdiffs[n].reshape((dim,1)).dot(xdiffs[n].reshape((1,dim))) for n in range(len)],axis=0) loglikelyhood = [] for n in range(len): tmp = [([k]*([n],[k],[k])) for k in range()] tmp = ((tmp)) (list(tmp)) loglikelyhood = (loglikelyhood) for i in range(len): gammas[i] = gammas[i]/(gammas[i]) = gammas = [(gammas[i]) for i in range(len)] def run_main(): """ 这是主函数 """ # 导入Iris数据集 iris = load_iris() label = () data = () print("Iris数据集的标签:\n",label) # 对数据进行预处理 data = Normalizer().fit_transform(data) # 解决画图是的中文乱码问题 ['-serif'] = [u'simHei'] ['axes.unicode_minus'] = False # 数据可视化 (data[:,0],data[:,1],c = label) ("Iris数据集显示") () # GMM模型 K = 3 gmm = GMM(data,K) gmm.GMM_EM() y_pre = print("GMM预测结果:\n",y_pre) print("GMM正确率为:\n",accuracy_score(label,y_pre)) (data[:, 0], data[:, 1], c=y_pre) ("GMM结果显示") () if __name__ == '__main__': run_main()