本文实现的原理很简单,优化方法是用的梯度下降。后面有测试结果。
先来看看实现的示例代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
# coding=utf-8
from math import exp
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
def sigmoid(num):
'''
:param num: 待计算的x
:return: sigmoid之后的数值
'''
if type (num) = = int or type (num) = = float :
return 1.0 / ( 1 + exp( - 1 * num))
else :
raise ValueError, 'only int or float data can compute sigmoid'
class logistic():
def __init__( self , x, y):
if type (x) = = type (y) = = list :
self .x = np.array(x)
self .y = np.array(y)
elif type (x) = = type (y) = = np.ndarray:
self .x = x
self .y = y
else :
raise ValueError, 'input data error'
def sigmoid( self , x):
'''
:param x: 输入向量
:return: 对输入向量整体进行simgoid计算后的向量结果
'''
s = np.frompyfunc( lambda x: sigmoid(x), 1 , 1 )
return s(x)
def train_with_punish( self , alpha, errors, punish = 0.0001 ):
'''
:param alpha: alpha为学习速率
:param errors: 误差小于多少时停止迭代的阈值
:param punish: 惩罚系数
:param times: 最大迭代次数
:return:
'''
self .punish = punish
dimension = self .x.shape[ 1 ]
self .theta = np.random.random(dimension)
compute_error = 100000000
times = 0
while compute_error > errors:
res = np.dot( self .x, self .theta)
delta = self .sigmoid(res) - self .y
self .theta = self .theta - alpha * np.dot( self .x.T, delta) - punish * self .theta # 带惩罚的梯度下降方法
compute_error = np. sum (delta)
times + = 1
def predict( self , x):
'''
:param x: 给入新的未标注的向量
:return: 按照计算出的参数返回判定的类别
'''
x = np.array(x)
if self .sigmoid(np.dot(x, self .theta)) > 0.5 :
return 1
else :
return 0
def test1():
'''
用来进行测试和画图,展现效果
:return:
'''
x, y = make_blobs(n_samples = 200 , centers = 2 , n_features = 2 , random_state = 0 , center_box = ( 10 , 20 ))
x1 = []
y1 = []
x2 = []
y2 = []
for i in range ( len (y)):
if y[i] = = 0 :
x1.append(x[i][ 0 ])
y1.append(x[i][ 1 ])
elif y[i] = = 1 :
x2.append(x[i][ 0 ])
y2.append(x[i][ 1 ])
# 以上均为处理数据,生成出两类数据
p = logistic(x, y)
p.train_with_punish(alpha = 0.00001 , errors = 0.005 , punish = 0.01 ) # 步长是0.00001,最大允许误差是0.005,惩罚系数是0.01
x_test = np.arange( 10 , 20 , 0.01 )
y_test = ( - 1 * p.theta[ 0 ] / p.theta[ 1 ]) * x_test
plt.plot(x_test, y_test, c = 'g' , label = 'logistic_line' )
plt.scatter(x1, y1, c = 'r' , label = 'positive' )
plt.scatter(x2, y2, c = 'b' , label = 'negative' )
plt.legend(loc = 2 )
plt.title( 'punish value = ' + p.punish.__str__())
plt.show()
if __name__ = = '__main__' :
test1()
|
运行结果如下图
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流,谢谢大家对服务器之家的支持。
原文链接:http://www.cnblogs.com/chuxiuhong/p/6022566.html