Python机器学习应用 | 【第一周】无监督学习 - weixin_42906066的博客 - ****博客
https://blog.****.net/weixin_42906066/article/details/81867175
前置课程没有听,科学计算numpy和matlib画图(shuf)等在硬盘里,看一下会很方便进度。
=========
- import numpy as np
- import sklearn.cluster as skc
- from sklearn import metrics
- import matplotlib.pyplot as plt
- mac2id=dict()
- onlinetimes=[]
- f=open('TestData.txt',encoding='utf-8')
- for line in f:
- mac=line.split(',')[2]
- onlinetime=int(line.split(',')[6])
- starttime=int(line.split(',')[4].split(' ')[1].split(':')[0])
- if mac not in mac2id:
- mac2id[mac]=len(onlinetimes)
- onlinetimes.append((starttime,onlinetime))
- else:
- onlinetimes[mac2id[mac]]=[(starttime,onlinetime)]
- real_X=np.array(onlinetimes).reshape((-1,2))
- X=real_X[:,0:1]
- db=skc.DBSCAN(eps=0.01,min_samples=20).fit(X)
- labels = db.labels_
- print('Labels:')
- print(labels)
- raito=len(labels[labels[:] == -1]) / len(labels)
- print('Noise raito:',format(raito, '.2%'))
- n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
- print('Estimated number of clusters: %d' % n_clusters_)
- print("Silhouette Coefficient: %0.3f"% metrics.silhouette_score(X, labels))
- for i in range(n_clusters_):
- print('Cluster ',i,':')
- print(list(X[labels == i].flatten()))
- plt.hist(X,24)