matlab下kmeans及pam算法对球型数据分类练习

时间:2024-09-01 11:06:32
clear all;
clc; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%数据初始化
Data=zeros(,);
%加噪声
for i=:
Data(,i)=;
Data(,i)=;
Data(,i)=;
end
for i=:
p=unifrnd(,);
a=unifrnd(,*pi);
b=unifrnd(,pi);
Data(,i)=p*sin(a)*cos(b);
Data(,i)=p*sin(a)*sin(b);
Data(,i)=p*cos(a);
end
for i=:
p=unifrnd(,);
a=unifrnd(,*pi);
b=unifrnd(,pi);
Data(,i)=p*sin(a)*cos(b);
Data(,i)=p*sin(a)*sin(b);
Data(,i)=p*cos(a);
end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%样本数量
[d,N]=size(Data);
%聚类的数目
K=;
%方法选择
method='kmeans';
%method='kmedoids';
%选取初始点
%max_Initial=max(,N/(*K));
max_Initial=; label=zeros(max_Initial,N);
center=zeros(d,K,max_Initial);
C=zeros(,N); %主循环
for initial_Case=:max_Initial
pointK=Initial_center(Data,K);
iter=;
max_iter=1e+;
% xK = pointK;
disp(['------------KM进行第 ' num2str(initial_Case) ' 次重新选择初始中心-----------'])
%%每次初始化K个中心点后,进行的循环
while iter < max_iter
iter = iter+;
if mod(iter,)==
disp([' 内部循环进行第 ' num2str(iter) ' 次迭代'])
end
%%%根据数据矩阵P中每个点到中心点的距离(最小)确定所属分类
for i=:N
dert = repmat(Data(:,i),,K)-pointK;
distK=sqrt(diag(dert'*dert));
[~,j] = min(distK);
C(i) = j;
end
%%%重新计算K个中心点
xK_=zeros(d,K);
for i=:K
Pi=Data(:,find(C==i));
Nk = size(Pi,);
% K-Means K-Medoids唯一不同的地方:选择中心点的方式
switch lower(method)
case 'kmeans'
xK_(:,i) = sum(Pi,)/Nk;
case 'kmedoids'
Dx2 = zeros(,Nk);
for t=:Nk
dx=Pi-Pi(:,t)*ones(,Nk);
Dx2(t)=sum(sqrt(sum(dx.*dx,)),);
end
[~,min_ind] = min(Dx2);
xK_(:,i) = Pi(:,min_ind);
otherwise
errordlg('请输入正确的方法:kmeans-OR-kmedoids','MATLAB error');
end
end
%判断是否达到结束条件
if xK_==pointK % & iter>
disp(['###迭代 ' num2str(iter) ' 次得到收敛的解'])
label(initial_Case,:) = C;
center(:,:,initial_Case) = xK_;
% plot_Graph(C);
break;
end
pointK=xK_;
%xK = xK_;
end
if iter == max_iter
disp('###达到内部最大迭代次数1000,未得到收敛的解');
label(initial_Case,:) = C;
center(:,:,initial_Case) = xK_;
%plot_Graph(C);
%break
end
end %%%%增加对聚类结果最优性的比较
%距离差
dist_N = zeros(max_Initial,K);
for initial_Case=:max_Initial
for k=:K
tem=find(label(initial_Case,:)==k);
dx=Data(:,tem)-center(:,k,initial_Case)*ones(,size(tem,));
dxk=sqrt(sum(dx.*dx,));
dist_N(initial_Case,k)=sum(dxk);
%dist_N(initial_Case,k)=dxk;
end
end %%%%对于max_Initial次初始化中心点得到的分类错误
%%%%取错误最小的情况的Label作为最终分类
%求K类总的误差
dist_N_sum=sum(dist_N,);
[distmin,best_ind]=min(dist_N_sum);
%最佳分组
best_Label=label(best_ind,:);
%最佳中心
best_Center=center(:,:,best_ind);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%三维散布图
figure();
scatter3(Data(,:),Data(,:),Data(,:),'filled','cdata',best_Label);
title('Data Distribution');
function center=Initial_center(X,K)
%选取初始中心
N=size(X,);
rnd_Idx = randperm(N);
center = X(:,rnd_Idx(:K));
end