代码实现
clear all;
close all;
clc;
% 一、载入待分类的样本模式集
load mydata.mat
%设置控制参数
C = 3 ;%预期类数
Nc = 1 ;%初始聚类中心个数
Tn =15;%每一类的最少模式数目
Ts = 5;%类内距离标准差上界
Td = 24;%类间最小距离下界
L = 2;%在每次迭代中可以合并的类的最多对数
I = 10;%允许的最大迭代次数
Ip = 1;%初始迭代次数
%获取样本的长度
N = size(X);
N = N(1);
%选取初始聚类中心
R=randperm(N);
Sample(:,1) = X;Sample(:,2) = Y;
Z = zeros(Nc,2);%聚类中心
for i = 1:Nc
Z(i,:) = Sample(R(i),:);
end
%绘制样本
% grid on;
axis([-10 50 -10 50]);
hold on
% plot(X,Y,'mo');
%开始迭代
while(Ip <= I)
t = 1;
while(t)
% 二、按最短距离归类
di = zeros(N,Nc);
for i = 1:N
for j = 1:Nc
di(i,j) = norm(Sample(i,:) - Z(j,:));
end
end
dn = zeros(N,1);
for i = 1:N
[m ,n] = min(di(i,:));%m为最短距离,n为所在位置
dn(i,1) = n;%归类
end
% 三、根据Tn,即每类中允许的最少模式
ni = zeros(Nc,1);%各个聚类中心含有的模式数目
for i = 1:Nc
ni(i,1) = length(find(dn == i));%个数
if ni(i,1) < Tn
Nc = Nc-1;
Z(i,:) = 0;
end
end
v = find(Z(:,1)~=0);
K = zeros(Nc,2);
for i = 1:Nc
K(i,:) = Z(v(i),:);
end
Z = K;%合并后的聚类中心
if Nc >= j %相等时,说明每类样本数满足参数要求
break
end
end
%设置颜色
colour = zeros(Nc,3);
for i = 1:Nc
colour(i,:) = rand(1,3);
end
% 四、计算分类后的参数:更新聚类中心,类内平均距离,总体平均距离
%更新聚类中心
dj = zeros(Nc,1);%类内平均距离
for i = 1:Nc
v = find(dn == i);%位置
u = ni(i,1);%个数
M = zeros(u,2);%用来存储所属同一类的点
D = zeros(u,1);%用来存储类内距离
for j = 1:u
M(j,:) = Sample(v(j),:);
D(j,1) = norm(M(j,:) - Z(i,:));
end
P = plot(M(:,1),M(:,2),'d');
set(P,'color',colour(i,:));
K(i,:) = mean(M);%新的聚类中心
%计算各类中模式到类心的平均距离dj
dj(i,1) = mean(D);%类内平均距离
end
Z1 = K;
%总体平均距离d
d = sum(ni.*dj)/N;
% 五、依据Ip、Nc判断停止,分裂,合并
if Ip >= I
Td = 0;
jump = 2;%转至9
elseif Nc <= C/2
jump = 1; %转至6
elseif Nc >= 2*C
jump = 2;%转至9
elseif Nc > C/2 && Nc < 2*C
if mod(Ip,2) == 1 %如果Ip为奇数
jump = 1; %转至6,分裂
else
jump = 2;%转至9 ,合并
end
end
switch (jump)
case 1 %%%%%%%%%分裂
% 六、计算类内距离标准差矢量si, 矢量维数:n = 2;类编号:j = Nc;
si = zeros(Nc,2);
for i = 1:Nc
v = find(dn == i);%位置
u = ni(i,1);%个数
d1 = zeros(u,1);%X与Z(i,1)做差
d2 = zeros(u,1);%X与Z(i,2)做差
for j = 1:u
d1(j,1) = (X(v(j),:)-Z1(i,1))^2;
d2(j,1) = (Y(v(j),:)-Z1(i,2))^2;
end
si(i,1) = mean(d1)^(1/2);
si(i,2) = mean(d2)^(1/2);
end
% 七、对于每一聚类,求出类内距离标准差矢量si中的最大分类simax
simax = zeros(Nc,1);
for i = 1:Nc
[m,n] = max(si(i,2));
simax(i,1) = m;
end
% 八、simax与类内允许的最大类内标准差Ts进行比较
for i = 1:Nc
if simax(i) > Ts
if (dj(i)>d && ni(i)>2*(Tn+1)) || Nc <= C/2
Nc = Nc+1;
K(i,1) = Z1(i,1)+ 0.5*simax(i);
K(Nc,1) = Z1(i,1)- 0.5*simax(i);
K(Nc,2) = Z1(i,2);
end
end
end
Z1 = K;%分裂后的
if i < Nc %判断是否进行了分裂
Ip = Ip+1;
Z = Z1;
else
% 九、计算各对聚类中心间的距离(类间距离)Dij
Dij = zeros(Nc-1,Nc);
for i = 1:Nc-1
for j = i+1:Nc
Dij(i,j) = norm(Z1(i,:) - Z1(j,:));
end
end
% 十、依据Td判断合并 Dij<Td 合并
k = 1;
dij = zeros(k,1);
for i = 1:Nc-1
for j = 1:Nc
if Dij(i,j) > 0 && Dij(i,j) < Td;
dij(k,1) = Dij(i,j);
k = k+1;
end
end
end
dij = sort(dij);%排序
%根据可以合并的最多对数L进行合并
g = size(dij);
g = g(1);
Zi = zeros(L,1);Zj = zeros(L,1);
for i = 1:L
if dij(i) == 0;
break
end
[Zi(i,1),Zj(i,1)] = find(Dij == dij(i));
if i == 1 || Zi(i,1) ~= Zi(i-1,1)
Z1(Zi(i),:) = (ni(Zi(i),1)*Z1(i,:)+ni(Zj(i),1)*Z1(j,:))/(ni(Zi(i),1)+ni(Zj(i),1));
Z1(Zj(i),:) = 0;
Nc = Nc-1;
end
g = g-1;
if g == 0
break
end
end
v = find(Z1(:,1)~=0);
K = zeros(Nc,2);
for i = 1:Nc
K(i,:) = Z1(v(i),:);
end
Z1 = K;%合并后的聚类中心
% 十一、判断结束、或调整参数、或继续迭代
if Ip >= I || (isequal(size(Z1),size(Z)) && abs(sum(sum(Z1-Z)))<0.0001 )
Z = Z1;
break
else
Ip = Ip+1;
end
end
case 2 %%%%%%%%%合并
% 九、计算各对聚类中心间的距离(类间距离)Dij
Dij = zeros(Nc-1,Nc);
for i = 1:Nc-1
for j = i+1:Nc
Dij(i,j) = norm(Z1(i,:) - Z1(j,:));
end
end
% 十、依据Td判断合并 Dij<Td 合并
k = 1;
dij = zeros(k,1);
for i = 1:Nc-1
for j = 1:Nc
if Dij(i,j) > 0 && Dij(i,j) < Td;
dij(k,1) = Dij(i,j);
k = k+1;
end
end
end
dij = sort(dij);%排序
%根据可以合并的最多对数L进行合并
g = size(dij);
g = g(1);
Zi = zeros(L,1);Zj = zeros(L,1);
for i = 1:L
if dij == 0
break
end
[Zi(i,1),Zj(i,1)] = find(Dij == dij(i));
if i == 1 || Zi(i,1) ~= Zi(i-1,1)
Z1(Zi(i),:) = (ni(Zi(i),1)*Z1(i,:)+ni(Zj(i),1)*Z1(j,:))/(ni(Zi(i),1)+ni(Zj(i),1));
Z1(Zj(i),:) = 0;
Nc = Nc-1;
end
g = g-1;
if g == 0
break
end
end
v = find(Z1(:,1)~=0);
K = zeros(Nc,2);
for i = 1:Nc
K(i,:) = Z1(v(i),:);
end
Z1 = K;%合并后的聚类中心
% 十一、判断结束、或调整参数、或继续迭代
if Ip >= I || ( isequal(size(Z1),size(Z)) && abs(sum(sum(Z1-Z)))<0.0001 )
Z = Z1;
break
else
Ip = Ip+1;
end
end
end
%显示距离中心
for i = 1:Nc
plot(Z(i,1),Z(i,2),'kp ');
end