delphi pascal 写的 fcm kmeans 模糊C-均值算法

时间:2022-01-24 23:03:56

delphi fcm kmeans 模糊C-均值算法

关键字:fcm kmeans 模糊C - 均值 算法 delphi 多边形 重点 多边形 中心 gis
说明:用于数据挖掘,找出一批散乱的点的重点位置。
         分两个程序,一个是 uKmeans.pas,一个是主程序用于运行例子,如下:


unit uKmeans;
//本单元程序由 Shyam Sivaraman 用java编写的 cluster.kmeans 类翻译改写而来
//一夜流星
//qq: 86804
//2007-12-25
interface
uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs, math;
type
  TCluster = class;
  TCentroid = class;
  TDataPoint = class;
  TJCA = class;
  TVector = array of TList;
  
  TCluster = class(TObject)
  private
    mName: String;
    mCentroid: TCentroid;
    mSumSqr: double;
    mDataPoints: TList;
  public
    constructor create;
    destructor destory;
    procedure Cluster(name: String);
    procedure setCentroid(var c:TCentroid);
    function getCentroid(): TCentroid;
    procedure addDataPoint(var dp:TDataPoint);
    procedure removeDataPoint(var dp:TDataPoint);
    function getNumDataPoints(): Integer;
    function getDataPoint(pos:Integer): TDataPoint;
    procedure calcSumOfSquares();
    function getSumSqr(): Double;
    function getName(): String;
    function getDataPoints(): TList;
  end;
  TCentroid = class(TObject)
  private
    mCx: double;
    mCy: double;
    mCluster: TCluster;
  public
    procedure Centroid(cx:Double; cy:Double);
    procedure calcCentroid();
    procedure setCluster(var c:TCluster);
    function getCX(): Double;
    function getCY(): Double;
    function getCluster(): TCluster;
  end;
  TDataPoint = class(TObject)
  private
    mX: double;
    mY: double;
    mObjName: String;
    mCluster: TCluster;
    mEuDt: double;
  public
    procedure DataPoint(x:Double; y:Double; name: String);
    procedure setCluster(var cluster:TCluster);
    procedure calcEuclideanDistance();
    function testEuclideanDistance(var c: TCentroid): Double;
    function getX(): Double;
    function getY(): Double;
    function getCluster(): TCluster;
    function getCurrentEuDt(): Double;
    function getObjName(): String;
  end;
  TJCA = class(TObject)
  private
    clusters: array of TCluster;
    miter: Integer;
    mDataPoints: TList;
    mSWCSS: double;
    procedure calcSWCSS();
    procedure setInitialCentroids();
    function getMaxXValue(): Double;
    function getMinXValue(): Double;
    function getMaxYValue(): Double;
    function getMinYValue(): Double;    
  public
    constructor create;
    destructor destory;
    procedure JCA(k: Integer; iter: Integer; var dataPoints: TList );
    procedure startAnalysis();
    function getClusterOutput(): TVector;
    function getKValue(): Integer;
    function getIterations(): Integer;
    function getTotalDataPoints(): Integer;
    function getSWCSS(): double;
    function getCluster(pos: Integer): TCluster;
  end;
  
implementation

//==============================================================================
// TDataPoint
//==============================================================================
procedure TDataPoint.DataPoint(x:Double; y:Double; name: String);
begin
  self.mX := x;
  self.mY := y;
  self.mObjName := name;
  self.mCluster := NIL;
end;
procedure TDataPoint.setCluster(var cluster:TCluster);
begin
  self.mCluster := cluster;
  calcEuclideanDistance();
end;
procedure TDataPoint.calcEuclideanDistance();
var d1, d2: double;
begin
  //Exp(ln(x)*y) x的y次方
  //called when DP is added to a cluster or when a Centroid is recalculated.
  d1 := mCluster.getCentroid().getCx();
  d1 := mX - d1;
  d1 := d1 * d1;
    
  d2 := mCluster.getCentroid().getCy();
  d2 := mY - d2;
  d2 := d2 * d2;
  mEuDt := sqrt( d1 + d2 );
end;                                                                                                              
function TDataPoint.testEuclideanDistance(var c: TCentroid): Double;
var d1, d2: double;
begin
  d1 := mX - c.getCx();
  d1 := d1 * d1;
  d2 := mY - c.getCy();
  d2 := d2 * d2;
  result := sqrt( d1 + d2 );
end;
function TDataPoint.getX(): Double;
begin
  result := mX;
end;
function TDataPoint.getY(): Double;
begin
  result := mY;
end;
function TDataPoint.getCluster(): TCluster;
begin
  result := mCluster;
end;
function TDataPoint.getCurrentEuDt(): Double;
begin
  result := mEuDt;
end;
function TDataPoint.getObjName(): String;
begin
  result := mObjName;
end;
//==============================================================================
// TCentroid
//==============================================================================
procedure TCentroid.Centroid(cx:Double; cy:Double);
begin
  self.mCx := cx;
  self.mCy := cy;
end;
procedure TCentroid.calcCentroid();
var i, numDP: Integer;
  tempX, tempY: double;
begin
  numDP := mCluster.getNumDataPoints();
  tempX := 0;
  tempY := 0;
  //caluclating the new Centroid
  for i := 0 to numDP -1 do begin
    tempX := tempX + mCluster.getDataPoint(i).getX();
    //total for x
    tempY := tempY + mCluster.getDataPoint(i).getY();
    //total for y
  end;
  self.mCx := tempX / numDP;
  self.mCy := tempY / numDP;
  //calculating the new Euclidean Distance for each Data Point
  tempX := 0;
  tempY := 0;
  for i := 0 to numDP -1 do begin
    mCluster.getDataPoint(i).calcEuclideanDistance();
  end;
  //calculate the new Sum of Squares for the Cluster
  mCluster.calcSumOfSquares();
end;

procedure TCentroid.setCluster(var c:TCluster);
begin
  self.mCluster := c;
end;
function TCentroid.getCX(): Double;
begin
  result := mCx;
end;
function TCentroid.getCY(): Double;
begin
  result := mCy;
end;
function TCentroid.getCluster(): TCluster;
begin
  result := mCluster;
end;

//==============================================================================
// TCluster
//==============================================================================
constructor TCluster.Create;
begin
    inherited create;
end;
destructor TCluster.destory;
begin
    inherited destroy;
end;
procedure TCluster.Cluster(name: String);
begin
  self.mName := name;
  self.mCentroid := Nil; //will be set by calling setCentroid()
  mDataPoints := TList.Create;
end;
procedure TCluster.setCentroid( var c:TCentroid);
begin
  self.mCentroid := c;
end;
function TCluster.getCentroid(): TCentroid;
begin
  result := self.mCentroid;;
end;
procedure TCluster.addDataPoint(var dp:TDataPoint);
begin
  dp.setCluster(self); //initiates a inner call to calc EuclideanDistance() in DP.
  self.mDataPoints.Add(dp);
  calcSumOfSquares();
end;
procedure TCluster.removeDataPoint(var dp:TDataPoint);
begin
  self.mDataPoints.Remove(dp);
  calcSumOfSquares();
end;
function TCluster.getNumDataPoints(): Integer;
begin
  result := self.mDataPoints.Count;
end;
function TCluster.getDataPoint(pos:Integer): TDataPoint;
begin
  result := TDataPoint( mDataPoints.Items[pos] );
end;
procedure TCluster.calcSumOfSquares();
var i, size: Integer;
  temp: Double;
begin
  size := self.mDataPoints.Count;
  temp := 0;
  for i := 0 to size-1 do begin
    temp := temp + (TDataPoint( mDataPoints.Items[i] ) ).getCurrentEuDt();
  end;
  self.mSumSqr := temp;
end;
function TCluster.getSumSqr(): Double;
begin
  result := self.mSumSqr;
end;
function TCluster.getName(): String;
begin
  result := self.mName;
end;
function TCluster.getDataPoints(): TList;
begin
  result := self.mDataPoints;
end;
//==============================================================================
// TJCA
//==============================================================================
constructor TJCA.Create;
begin
    inherited create;
    mDataPoints := TList.Create;
end;
destructor TJCA.destory;
begin
    mDataPoints.Clear;
    mDataPoints.Free;
    inherited destroy;
end;    

procedure TJCA.calcSWCSS();
var temp: double;
  i: Integer;
begin
  temp := 0;
  for i := 0 to Length(clusters) -1 do begin
    temp := temp + clusters[i].getSumSqr();
  end;
  mSWCSS := temp;
end;
procedure TJCA.setInitialCentroids();
var cx, cy: double;
  n: Integer;
  c1: TCentroid;
begin
  //kn = (round((max-min)/k)*n)+min where n is from 0 to (k-1).
  cx := 0;
  cy := 0;
  for n := 1 to Length(clusters) do begin
    cx := (((getMaxXValue() - getMinXValue()) / (length(clusters) + 1)) * n) + getMinXValue();
    cy := (((getMaxYValue() - getMinYValue()) / (Length(clusters) + 1)) * n) + getMinYValue();
    c1 := TCentroid.create;
    c1.Centroid(cx, cy);
    clusters[n - 1].setCentroid(c1);
    c1.setCluster(clusters[n - 1]);
  end;
  
end;
function TJCA.getMaxXValue(): Double;
var temp: double;
  i: Integer;
  dp: TDataPoint;
begin
  temp := TDataPoint(mDataPoints.Items[0]).getX();
  for i := 0 to mDataPoints.count -1 do begin
    dp := TDataPoint( mDataPoints.items[i] );
    if (dp.getX() > temp) then
      temp := dp.getX();
  end;
  result := temp;
end;
function TJCA.getMinXValue(): Double;
var temp: double;
  i: Integer;
  dp: TDataPoint;
begin
  temp := TDataPoint(mDataPoints.items[0]).getX();
  for i := 0 to mDataPoints.count -1 do begin
    dp := TDataPoint( mDataPoints.items[i] );
    if (dp.getX() < temp) then
      temp := dp.getX();
  end;
  result := temp;
end;

function TJCA.getMaxYValue(): Double;
var temp: double;
  i: Integer;
  dp: TDataPoint;
begin
  temp := TDataPoint(mDataPoints.items[0]).getY();
  for i := 0 to mDataPoints.count -1 do begin
    dp := TDataPoint( mDataPoints.items[i] );
    if (dp.getY() > temp) then
      temp := dp.getY();
  end;
  result := temp;
end;
function TJCA.getMinYValue(): Double;
var temp: double;
  i: Integer;
  dp: TDataPoint;
begin
  temp := TDataPoint(mDataPoints.items[0]).getY();
  for i := 0 to mDataPoints.count -1 do begin
    dp := TDataPoint( mDataPoints.items[i] );
    if (dp.getY() < temp) then
      temp := dp.getY();
  end;
  result := temp;
end;

procedure TJCA.JCA(k: Integer; iter: Integer; var dataPoints: TList );
var i: Integer;
begin
  //clusters: array of TCluster;
  setLength( clusters, k );
  for i := 0 to k -1 do begin
    clusters[i] := TCluster.create;
    clusters[i].Cluster( 'Cluster' + IntToStr( i ) );
  end;
  self.miter := iter;
  self.mDataPoints := dataPoints;
end;
procedure TJCA.startAnalysis();
var i, j, k, l, m, n, gp, gq: Integer;
  loop1: Boolean;
  tempEuDt: double;
  tempCluster: TCluster;
  matchFoundFlag: boolean;
  dp: TDataPoint;
  dc: TCentroid;
begin
//set Starting centroid positions - Start of Step 1
setInitialCentroids();
  j := 0;
  while( j < mDataPoints.count ) do begin
    for i := 0 to Length(clusters) -1 do begin
      dp := TDataPoint(mDataPoints.items[j]);
      clusters[i].addDataPoint( dp );
      j := j + 1;
      if( j >= mDataPoints.count ) then break;
    end;
  end;
  
//calculate E for all the clusters
calcSWCSS();
//recalculate Cluster centroids - Start of Step 2
for i := 0 to length( clusters ) -1 do begin
  clusters[i].getCentroid().calcCentroid();
end;
//recalculate E for all the clusters
calcSWCSS();
for i := 0 to miter -1 do begin
  //enter the loop for cluster 1
  for j := 0 to length( clusters ) -1 do begin
      k := 0;
      while( true ) do begin
        gp := clusters[j].getNumDataPoints();
        if k >= gp then break;
    //pick the first element of the first cluster
    //get the current Euclidean distance
    tempEuDt := clusters[j].getDataPoint(k).getCurrentEuDt();
    tempCluster := Nil;
    matchFoundFlag := false;
    //call testEuclidean distance for all clusters
    for l := 0 to length(clusters)-1 do begin
      //if testEuclidean < currentEuclidean then
          dc := clusters[l].getCentroid();
     if (tempEuDt > clusters[j].getDataPoint(k).testEuclideanDistance(dc)) then begin
            dc := clusters[l].getCentroid();
      tempEuDt := clusters[j].getDataPoint(k).testEuclideanDistance(dc);
      tempCluster := clusters[l];
      matchFoundFlag := true;
     end;
     //if statement - Check whether the Last EuDt is > Present EuDt
        end;
       //for variable 'l' - Looping between different Clusters for matching a Data Point.
       //add DataPoint to the cluster and calcSWCSS
        if (matchFoundFlag) then begin
          dp := clusters[j].getDataPoint(k);
       tempCluster.addDataPoint(dp);
          dp := clusters[j].getDataPoint(k);
       clusters[j].removeDataPoint(dp);
     for m := 0 to length(clusters)-1 do begin
      clusters[m].getCentroid().calcCentroid();
     end;
     //for variable 'm' - Recalculating centroids for all Clusters
     calcSWCSS();
    end;
    //if statement - A Data Point is eligible for transfer between Clusters.
        k := k + 1;
   end;//for variable 'k' - Looping through all Data Points of the current Cluster.
  end;//for variable 'j' - Looping through all the Clusters.
end;//for variable 'i' - Number of iterations.
  
end;
function TJCA.getClusterOutput(): TVector;
var v: TVector;
  i: Integer;
begin
  setLength( v, Length(clusters) );
  for i := 0 to Length(clusters) -1 do begin
    v[i] := clusters[i].getDataPoints();
  end;
  result := v;
end;
function TJCA.getKValue(): Integer;
begin
  result := Length(clusters);
end;
function TJCA.getIterations(): Integer;
begin
  result := miter;
end;
function TJCA.getTotalDataPoints(): Integer;
begin
  result := mDataPoints.count;
end;
function TJCA.getSWCSS(): double;
begin
  result := mSWCSS;
end;
function TJCA.getCluster(pos: Integer): TCluster;
begin
  result := clusters[pos];
end;
    
end.


==========================================================
unit uFrmMain;
interface
uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;
type
  TFrmMain = class(TForm)
    Memo1: TMemo;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;
var
  FrmMain: TFrmMain;
implementation
{$R *.dfm}
uses uKmeans;

procedure TFrmMain.Button1Click(Sender: TObject);
var tempV, dataPoints: TList;
  dp: TDataPoint;
  i, j: Integer;
  jca: TJca;
  v: TVector;
  ct: TCluster;
  cr: TCentroid;
begin
  //添加多个任意坐标
  dataPoints := TList.Create;
  dp := TDataPoint.Create;
  dp.DataPoint(113.03678, 28.245000, 'p53');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.05211, 28.245000, 'maltase');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.05211, 28.234920, 'bcl2');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.03670, 28.234920, 'fas');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.04116, 28.242807, 'fas1');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.04081, 28.236795, 'fas2');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.03670, 28.234920, 'fas3');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.04818, 28.236918, 'fas4');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.03885, 28.239306, 'fas5');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.03886, 28.239307, 'fas6');
  dataPoints.Add( dp );

  dp := TDataPoint.Create;
  dp.DataPoint(113.03887, 28.239308, 'fas7');
  dataPoints.Add( dp );
  dp := TDataPoint.Create;
  dp.DataPoint(113.03888, 28.239309, 'fas8');
  dataPoints.Add( dp );
  //再加入一千个坐标
  for i := 1 to 1000 do begin
    dp := TDataPoint.Create;
    dp.DataPoint(113.03736, 28.243408, 'f1as' + inttostr(i) );
    dataPoints.Add( dp );
  end;
  jca := TJCa.create;
  //参数:3表示要求多少个重心点,100表示模拟计算多少次求平均结果,datapoints表示点阵
  jca.JCA(3, 100, dataPoints);
  jca.startAnalysis();

  v := jca.getClusterOutput();
  for i := 0 to length(v) -1 do begin
    tempV := v[i];
    memo1.Lines.add('-----------Cluster' + inttostr(i) + '---------');
    {
      for j := 0 to tempV.Count -1 do begin
        dp := TDataPoint( tempV.Items[j] );
        memo1.Lines.Add( dp.getObjName() + '[' + floattostr( dp.getX() ) + ',' + floattostr( dp.getY() ) + ']');
      end;
    }
    ct := jca.getCluster(i);
    cr := ct.getCentroid();
    
  //利用format函数来处理。
  //format('%8.7f',[aa]);
  
    //memo1.Lines.Add( floattostr( cr.getCx() ) + ',' + floattostr( cr.getCy() ) );
    memo1.Lines.Add( format('%.7f',[cr.getCx()]) + ',' + format('%.7f',[cr.getCy()]) );
  end;

end;
end.