数据挖掘聚类算法--Kmeans

时间:2021-05-16 23:40:31

算法采用数据集为iris(鸢尾花)可以在UCI上下载 http://archive.ics.uci.edu/ml/datasets/Iris

数据集介绍:

数据挖掘聚类算法--Kmeans数据挖掘聚类算法--Kmeans
1. sepal length in cm
2. sepal width in cm
3. petal length in cm
4. petal width in cm
5. class:
-- Iris Setosa
-- Iris Versicolour
-- Iris Virginica
View Code

直接上代码:

 1 package neugle.kmeans;
 2 
 3 public class IrisModel {
 4     public double Sep_len = 0;
 5     public double Sep_wid = 0;
 6     public double Pet_len = 0;
 7     public double Pet_wid = 0;
 8     public String Iris_type = "";
 9 
10     public boolean equals(Object obj) {
11         IrisModel iris = (IrisModel) obj;
12         return this.Sep_len == iris.Sep_len && this.Sep_wid == iris.Sep_wid
13                 && this.Pet_len == iris.Pet_len && this.Pet_wid == iris.Pet_wid;
14     }
15 }
  1 package neugle.kmeans;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.FileNotFoundException;
  5 import java.io.FileReader;
  6 import java.io.IOException;
  7 import java.util.ArrayList;
  8 import java.util.Iterator;
  9 
 10 public class Kmeans {
 11     private static int k = 3;// 划分簇数目
 12     private static int dataCount = 150;// 文本数量
 13     private static int n = 0;// 迭代次数
 14 
 15     public static void main(String[] args) {
 16         ArrayList<IrisModel> irisList = ReadFile();// 取得文本中数据
 17         ArrayList<IrisModel> beforeRandomPot = new ArrayList<IrisModel>();// 记录上一次质心位置
 18         ArrayList<IrisModel> randomPot = RandomPot(irisList);// 获得随机数据
 19         ArrayList<ArrayList<IrisModel>> kMeansList = null;
 20         while (!CompareRandomPot(beforeRandomPot, randomPot)) {
 21             kMeansList = KMeans(irisList, randomPot);// 进行n次聚类
 22             n++;
 23         }
 24         Print(kMeansList);
 25         System.out.println("迭代了" + n + "次");
 26     }
 27 
 28     // 读取文件中数据
 29     private static ArrayList<IrisModel> ReadFile() {
 30         FileReader read = null;
 31         BufferedReader br = null;
 32         ArrayList<IrisModel> irisList = new ArrayList<IrisModel>();
 33         try {
 34             read = new FileReader("D:\\iris.data");
 35             br = new BufferedReader(read);
 36             String readLine = null;
 37             while ((readLine = br.readLine()) != null) {
 38                 IrisModel iris = new IrisModel();
 39                 String[] agrs = readLine.split(",");
 40                 iris.Sep_len = Double.parseDouble(agrs[0]);
 41                 iris.Sep_wid = Double.parseDouble(agrs[1]);
 42                 iris.Pet_len = Double.parseDouble(agrs[2]);
 43                 iris.Pet_wid = Double.parseDouble(agrs[3]);
 44                 iris.Iris_type = agrs[4];
 45                 irisList.add(iris);
 46             }
 47         } catch (FileNotFoundException e) {
 48             System.out.println("读取文件异常");
 49             irisList = null;
 50         } catch (IOException e) {
 51             System.out.println("读取文件异常");
 52             irisList = null;
 53         } finally {
 54             try {
 55                 br.close();
 56             } catch (IOException e) {
 57                 System.out.println("关闭文件异常");
 58             }
 59         }
 60         return irisList;
 61     }
 62 
 63     // 随机生成初始k个点
 64     private static ArrayList<IrisModel> RandomPot(ArrayList<IrisModel> irisList) {
 65         ArrayList<Integer> initCenter = new ArrayList<Integer>();
 66         ArrayList<IrisModel> randomPot = new ArrayList<IrisModel>();
 67         for (int i = 0; i < k; i++) {
 68             int num = (int) (Math.random() * dataCount);
 69             if (!initCenter.contains(num))
 70                 initCenter.add(num);
 71             else
 72                 i--;
 73         }
 74         Iterator<Integer> i = initCenter.iterator();
 75         while (i.hasNext()) {
 76             randomPot.add(irisList.get(i.next()));
 77         }
 78         return randomPot;
 79     }
 80 
 81     // KMeans主程序
 82     private static ArrayList<ArrayList<IrisModel>> KMeans(
 83             ArrayList<IrisModel> irisList, ArrayList<IrisModel> randomPot) {
 84         ArrayList<ArrayList<IrisModel>> groupNum = new ArrayList<ArrayList<IrisModel>>();
 85         for (int i = 0; i < randomPot.size(); i++) {
 86             ArrayList<IrisModel> list = new ArrayList<IrisModel>();
 87             list.add(randomPot.get(i));
 88             groupNum.add(list);
 89         }
 90         for (int i = 0; i < irisList.size(); i++) {
 91             double temp = Double.MAX_VALUE;
 92             int flag = -1;
 93             for (int j = 0; j < randomPot.size(); j++) {
 94                 double distance = DistanceOfTwoPoint(irisList.get(i),
 95                         randomPot.get(j));
 96                 if (distance < temp) {
 97                     temp = distance;
 98                     flag = j;
 99                 }
100             }
101             groupNum.get(flag).add(irisList.get(i));
102         }
103         // 重新计算质心
104         ArrayList<IrisModel> tempList = CalcCenter(groupNum);
105         randomPot.clear();
106         for (int i = 0; i < tempList.size(); i++) {
107             randomPot.add(tempList.get(i));
108         }
109         return groupNum;
110     }
111 
112     // 计算两点欧氏距离
113     private static double DistanceOfTwoPoint(IrisModel d1, IrisModel d2) {
114         double sum = Math.sqrt(Math.pow((d1.Sep_len - d2.Sep_len), 2)
115                 + Math.pow((d1.Sep_wid - d2.Sep_wid), 2)
116                 + Math.pow((d1.Pet_len - d2.Pet_len), 2)
117                 + Math.pow((d1.Pet_wid - d2.Pet_wid), 2));
118         return sum;
119     }
120 
121     // 重新计算k个簇的质心
122     private static ArrayList<IrisModel> CalcCenter(
123             ArrayList<ArrayList<IrisModel>> c) {
124         ArrayList<IrisModel> cIris = new ArrayList<IrisModel>();
125         Iterator<ArrayList<IrisModel>> i = c.iterator();
126         while (i.hasNext()) {
127             ArrayList<IrisModel> irisList = i.next();
128             IrisModel eIris = new IrisModel();
129             for (int k = 0; k < irisList.size(); k++) {
130                 eIris.Sep_len += irisList.get(k).Sep_len;
131                 eIris.Sep_wid += irisList.get(k).Sep_wid;
132                 eIris.Pet_len += irisList.get(k).Pet_len;
133                 eIris.Pet_wid += irisList.get(k).Pet_wid;
134             }
135             eIris.Sep_len = eIris.Sep_len / irisList.size();
136             eIris.Sep_wid = eIris.Sep_wid / irisList.size();
137             eIris.Pet_len = eIris.Pet_len / irisList.size();
138             eIris.Pet_wid = eIris.Pet_wid / irisList.size();
139             cIris.add(eIris);
140         }
141 
142         return cIris;
143     }
144 
145     // 比较前后两次的质心,以确定是否结束
146     private static Boolean CompareRandomPot(
147             ArrayList<IrisModel> beforeRandomPot, ArrayList<IrisModel> randomPot) {
148         boolean flag = true;
149         for (int i = 0; i < randomPot.size(); i++) {
150             if (beforeRandomPot.size() <= 0
151                     || !beforeRandomPot.contains(randomPot.get(i))) {
152                 flag = false;
153                 break;
154             }
155         }
156         if (flag == false) {
157             if (beforeRandomPot.size() > 0) {
158                 beforeRandomPot.clear();
159             }
160             for (int i = 0; i < randomPot.size(); i++) {
161                 beforeRandomPot.add(randomPot.get(i));
162             }
163         }
164         return flag;
165     }
166 
167     // 打印
168     private static void Print(ArrayList<ArrayList<IrisModel>> kmeansList) {
169         System.out.println("------------------------------------");
170         Iterator<ArrayList<IrisModel>> i = kmeansList.iterator();
171         while (i.hasNext()) {
172             Iterator<IrisModel> ii = i.next().iterator();
173             int n = 0;
174             while (ii.hasNext()) {
175                 n++;
176                 IrisModel irisModel = ii.next();
177                 if (n == 1)
178                     continue;
179                 System.out.println(irisModel.Sep_len + " " + irisModel.Sep_wid
180                         + " " + irisModel.Pet_len + " " + irisModel.Pet_wid
181                         + " " + irisModel.Iris_type);
182             }
183             System.out.println(n - 1);
184             System.out.println("------------------------------------");
185         }
186     }
187 }

实验结果:

数据挖掘聚类算法--Kmeans数据挖掘聚类算法--Kmeans
------------------------------------
7.0 3.2 4.7 1.4 Iris-versicolor
6.4 3.2 4.5 1.5 Iris-versicolor
5.5 2.3 4.0 1.3 Iris-versicolor
6.5 2.8 4.6 1.5 Iris-versicolor
5.7 2.8 4.5 1.3 Iris-versicolor
6.3 3.3 4.7 1.6 Iris-versicolor
4.9 2.4 3.3 1.0 Iris-versicolor
6.6 2.9 4.6 1.3 Iris-versicolor
5.2 2.7 3.9 1.4 Iris-versicolor
5.0 2.0 3.5 1.0 Iris-versicolor
5.9 3.0 4.2 1.5 Iris-versicolor
6.0 2.2 4.0 1.0 Iris-versicolor
6.1 2.9 4.7 1.4 Iris-versicolor
5.6 2.9 3.6 1.3 Iris-versicolor
6.7 3.1 4.4 1.4 Iris-versicolor
5.6 3.0 4.5 1.5 Iris-versicolor
5.8 2.7 4.1 1.0 Iris-versicolor
6.2 2.2 4.5 1.5 Iris-versicolor
5.6 2.5 3.9 1.1 Iris-versicolor
5.9 3.2 4.8 1.8 Iris-versicolor
6.1 2.8 4.0 1.3 Iris-versicolor
6.3 2.5 4.9 1.5 Iris-versicolor
6.1 2.8 4.7 1.2 Iris-versicolor
6.4 2.9 4.3 1.3 Iris-versicolor
6.6 3.0 4.4 1.4 Iris-versicolor
6.8 2.8 4.8 1.4 Iris-versicolor
6.0 2.9 4.5 1.5 Iris-versicolor
5.7 2.6 3.5 1.0 Iris-versicolor
5.5 2.4 3.8 1.1 Iris-versicolor
5.5 2.4 3.7 1.0 Iris-versicolor
5.8 2.7 3.9 1.2 Iris-versicolor
6.0 2.7 5.1 1.6 Iris-versicolor
5.4 3.0 4.5 1.5 Iris-versicolor
6.0 3.4 4.5 1.6 Iris-versicolor
6.7 3.1 4.7 1.5 Iris-versicolor
6.3 2.3 4.4 1.3 Iris-versicolor
5.6 3.0 4.1 1.3 Iris-versicolor
5.5 2.5 4.0 1.3 Iris-versicolor
5.5 2.6 4.4 1.2 Iris-versicolor
6.1 3.0 4.6 1.4 Iris-versicolor
5.8 2.6 4.0 1.2 Iris-versicolor
5.0 2.3 3.3 1.0 Iris-versicolor
5.6 2.7 4.2 1.3 Iris-versicolor
5.7 3.0 4.2 1.2 Iris-versicolor
5.7 2.9 4.2 1.3 Iris-versicolor
6.2 2.9 4.3 1.3 Iris-versicolor
5.1 2.5 3.0 1.1 Iris-versicolor
5.7 2.8 4.1 1.3 Iris-versicolor
5.8 2.7 5.1 1.9 Iris-virginica
4.9 2.5 4.5 1.7 Iris-virginica
5.7 2.5 5.0 2.0 Iris-virginica
5.8 2.8 5.1 2.4 Iris-virginica
6.0 2.2 5.0 1.5 Iris-virginica
5.6 2.8 4.9 2.0 Iris-virginica
6.3 2.7 4.9 1.8 Iris-virginica
6.2 2.8 4.8 1.8 Iris-virginica
6.1 3.0 4.9 1.8 Iris-virginica
6.3 2.8 5.1 1.5 Iris-virginica
6.0 3.0 4.8 1.8 Iris-virginica
5.8 2.7 5.1 1.9 Iris-virginica
6.3 2.5 5.0 1.9 Iris-virginica
5.9 3.0 5.1 1.8 Iris-virginica
62
------------------------------------
5.1 3.5 1.4 0.2 Iris-setosa
4.9 3.0 1.4 0.2 Iris-setosa
4.7 3.2 1.3 0.2 Iris-setosa
4.6 3.1 1.5 0.2 Iris-setosa
5.0 3.6 1.4 0.2 Iris-setosa
5.4 3.9 1.7 0.4 Iris-setosa
4.6 3.4 1.4 0.3 Iris-setosa
5.0 3.4 1.5 0.2 Iris-setosa
4.4 2.9 1.4 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
5.4 3.7 1.5 0.2 Iris-setosa
4.8 3.4 1.6 0.2 Iris-setosa
4.8 3.0 1.4 0.1 Iris-setosa
4.3 3.0 1.1 0.1 Iris-setosa
5.8 4.0 1.2 0.2 Iris-setosa
5.7 4.4 1.5 0.4 Iris-setosa
5.4 3.9 1.3 0.4 Iris-setosa
5.1 3.5 1.4 0.3 Iris-setosa
5.7 3.8 1.7 0.3 Iris-setosa
5.1 3.8 1.5 0.3 Iris-setosa
5.4 3.4 1.7 0.2 Iris-setosa
5.1 3.7 1.5 0.4 Iris-setosa
4.6 3.6 1.0 0.2 Iris-setosa
5.1 3.3 1.7 0.5 Iris-setosa
4.8 3.4 1.9 0.2 Iris-setosa
5.0 3.0 1.6 0.2 Iris-setosa
5.0 3.4 1.6 0.4 Iris-setosa
5.2 3.5 1.5 0.2 Iris-setosa
5.2 3.4 1.4 0.2 Iris-setosa
4.7 3.2 1.6 0.2 Iris-setosa
4.8 3.1 1.6 0.2 Iris-setosa
5.4 3.4 1.5 0.4 Iris-setosa
5.2 4.1 1.5 0.1 Iris-setosa
5.5 4.2 1.4 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
5.0 3.2 1.2 0.2 Iris-setosa
5.5 3.5 1.3 0.2 Iris-setosa
4.9 3.1 1.5 0.1 Iris-setosa
4.4 3.0 1.3 0.2 Iris-setosa
5.1 3.4 1.5 0.2 Iris-setosa
5.0 3.5 1.3 0.3 Iris-setosa
4.5 2.3 1.3 0.3 Iris-setosa
4.4 3.2 1.3 0.2 Iris-setosa
5.0 3.5 1.6 0.6 Iris-setosa
5.1 3.8 1.9 0.4 Iris-setosa
4.8 3.0 1.4 0.3 Iris-setosa
5.1 3.8 1.6 0.2 Iris-setosa
4.6 3.2 1.4 0.2 Iris-setosa
5.3 3.7 1.5 0.2 Iris-setosa
5.0 3.3 1.4 0.2 Iris-setosa
50
------------------------------------
6.9 3.1 4.9 1.5 Iris-versicolor
6.7 3.0 5.0 1.7 Iris-versicolor
6.3 3.3 6.0 2.5 Iris-virginica
7.1 3.0 5.9 2.1 Iris-virginica
6.3 2.9 5.6 1.8 Iris-virginica
6.5 3.0 5.8 2.2 Iris-virginica
7.6 3.0 6.6 2.1 Iris-virginica
7.3 2.9 6.3 1.8 Iris-virginica
6.7 2.5 5.8 1.8 Iris-virginica
7.2 3.6 6.1 2.5 Iris-virginica
6.5 3.2 5.1 2.0 Iris-virginica
6.4 2.7 5.3 1.9 Iris-virginica
6.8 3.0 5.5 2.1 Iris-virginica
6.4 3.2 5.3 2.3 Iris-virginica
6.5 3.0 5.5 1.8 Iris-virginica
7.7 3.8 6.7 2.2 Iris-virginica
7.7 2.6 6.9 2.3 Iris-virginica
6.9 3.2 5.7 2.3 Iris-virginica
7.7 2.8 6.7 2.0 Iris-virginica
6.7 3.3 5.7 2.1 Iris-virginica
7.2 3.2 6.0 1.8 Iris-virginica
6.4 2.8 5.6 2.1 Iris-virginica
7.2 3.0 5.8 1.6 Iris-virginica
7.4 2.8 6.1 1.9 Iris-virginica
7.9 3.8 6.4 2.0 Iris-virginica
6.4 2.8 5.6 2.2 Iris-virginica
6.1 2.6 5.6 1.4 Iris-virginica
7.7 3.0 6.1 2.3 Iris-virginica
6.3 3.4 5.6 2.4 Iris-virginica
6.4 3.1 5.5 1.8 Iris-virginica
6.9 3.1 5.4 2.1 Iris-virginica
6.7 3.1 5.6 2.4 Iris-virginica
6.9 3.1 5.1 2.3 Iris-virginica
6.8 3.2 5.9 2.3 Iris-virginica
6.7 3.3 5.7 2.5 Iris-virginica
6.7 3.0 5.2 2.3 Iris-virginica
6.5 3.0 5.2 2.0 Iris-virginica
6.2 3.4 5.4 2.3 Iris-virginica
38
------------------------------------
迭代了16次
View Code