DBSCAN——python实现

时间:2023-12-13 20:41:08
# -*- coding: utf-8 -*-
from matplotlib.pyplot import *
from collections import defaultdict
import random
import json
"""
计算两点欧式距离的函数
"""
def dist(p1,p2):
return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** (0.5) all_points = []
index = 1000
#use python build-in library to load the json file
flickr_data = json.load(file("Paris_points.json"))
for i in range(index):
Coord = [flickr_data['latitudes'][i],flickr_data['longitudes'][i]]
all_points.append(Coord) """
设置E和minPts的值
"""
E = 0.001
minPts = 7 """
随机产生100个直角坐标,测试用,测试时用E = 8, minPts = 8
"""
#all_points = []
# for i in range(100):
# randCoord = [random.randint(1,50),random.randint(1,50)]
# if not randCoord in all_points:
# all_points.append(randCoord) """
找出核心点
"""
other_points = []
core_points = []
plotted_points = []
for point in all_points:
point.append(0) #assign initial level 0,即定义核心点的类型,每个核心点作为一个中心
total = 0
for otherPoint in all_points:
distance = dist(otherPoint,point)
if distance <= E:
total += 1 if total > minPts:
core_points.append(point)
plotted_points.append(point)
else:
other_points.append(point) """
找到边界点
"""
border_points = []
for core in core_points:
for other in other_points:
if dist(core,other) <= E:
border_points.append(other)
plotted_points.append(other) """
完成分类的算法,给核心点都贴上标签
"""
cluster_label = 0 for point in core_points:
if point[2] == 0:
cluster_label += 1
point[2] = cluster_label for point2 in plotted_points:
distance = dist(point2, point)
if point2[2] == 0 and distance <= E:
#print point,point2
point2[2] = point[2] """
当所有的点都分配到相应的标签后,我们把同一簇的划分到一起
"""
cluster_list = defaultdict(lambda:[[],[]])
for point in plotted_points:
cluster_list[point[2]][0].append(point[0])
cluster_list[point[2]][1].append(point[1]) markers = ['+','*','.','d','^','v','>','<','p']
#markers = ['b.','g.','r.','c.','m.','y.','k.'] """
画出所有点的图
"""
figure(1)
allx = []
ally = []
for plot_point in all_points:
allx.append(plot_point[0])
ally.append(plot_point[1])
plot(allx, ally,"r.")
title("total points=" + str(len(all_points)) + " E =" + str(E) + " Min Points=" + str(minPts)) """
画出核心点的图
"""
figure(2)
i = 0
print cluster_list
for value in cluster_list:
cluster = cluster_list[value]
plot(cluster[0],cluster[1],markers[i])
i = i % 8 + 1
#i = i % 6 + 1
title(str(len(cluster_list)) + " clusters created with E = "+ str(E) + " Min Points=" + str(minPts)) """
画出噪音点的图
"""
figure(3)
noise_points = []
for point in all_points:
if not point in core_points and not point in border_points:
noise_points.append(point)
noisex = []
noisey = []
for point in noise_points:
noisex.append(point[0])
noisey.append(point[1])
plot(noisex,noisey,"x") title("noise Points = "+ str(len(noise_points)) + " E ="+str(E)+" Min Points="+str(minPts))
#axis((0,60,0,60))
show()