用一条直线对数据进行拟合的过程称为回归。逻辑回归分类的思想是:根据现有数据对分类边界线建立回归公式。
公式表示为:
一、梯度上升法
每次迭代所有的数据都参与计算。
for 循环次数:
训练
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
import numpy as np
import matplotlib.pyplot as plt
def loadData():
labelVec = []
dataMat = []
with open ( 'testSet.txt' ) as f:
for line in f.readlines():
dataMat.append([ 1.0 ,line.strip().split()[ 0 ],line.strip().split()[ 1 ]])
labelVec.append(line.strip().split()[ 2 ])
return dataMat,labelVec
def Sigmoid(inX):
return 1 / ( 1 + np.exp( - inX))
def trainLR(dataMat,labelVec):
dataMatrix = np.mat(dataMat).astype(np.float64)
lableMatrix = np.mat(labelVec).T.astype(np.float64)
m,n = dataMatrix.shape
w = np.ones((n, 1 ))
alpha = 0.001
for i in range ( 500 ):
predict = Sigmoid(dataMatrix * w)
error = predict - lableMatrix
w = w - alpha * dataMatrix.T * error
return w
def plotBestFit(wei,data,label):
if type (wei).__name__ = = 'ndarray' :
weights = wei
else :
weights = wei.getA()
fig = plt.figure( 0 )
ax = fig.add_subplot( 111 )
xxx = np.arange( - 3 , 3 , 0.1 )
yyy = - weights[ 0 ] / weights[ 2 ] - weights[ 1 ] / weights[ 2 ] * xxx
ax.plot(xxx,yyy)
cord1 = []
cord0 = []
for i in range ( len (label)):
if label[i] = = 1 :
cord1.append(data[i][ 1 : 3 ])
else :
cord0.append(data[i][ 1 : 3 ])
cord1 = np.array(cord1)
cord0 = np.array(cord0)
ax.scatter(cord1[:, 0 ],cord1[:, 1 ],c = 'red' )
ax.scatter(cord0[:, 0 ],cord0[:, 1 ],c = 'green' )
plt.show()
if __name__ = = "__main__" :
data,label = loadData()
data = np.array(data).astype(np.float64)
label = [ int (item) for item in label]
weight = trainLR(data,label)
plotBestFit(weight,data,label)
|
二、随机梯度上升法
1.学习参数随迭代次数调整,可以缓解参数的高频波动。
2.随机选取样本来更新回归参数,可以减少周期性的波动。
for 循环次数:
for 样本数量:
更新学习速率
随机选取样本
训练
在样本集中删除该样本
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
import numpy as np
import matplotlib.pyplot as plt
def loadData():
labelVec = []
dataMat = []
with open ( 'testSet.txt' ) as f:
for line in f.readlines():
dataMat.append([ 1.0 ,line.strip().split()[ 0 ],line.strip().split()[ 1 ]])
labelVec.append(line.strip().split()[ 2 ])
return dataMat,labelVec
def Sigmoid(inX):
return 1 / ( 1 + np.exp( - inX))
def plotBestFit(wei,data,label):
if type (wei).__name__ = = 'ndarray' :
weights = wei
else :
weights = wei.getA()
fig = plt.figure( 0 )
ax = fig.add_subplot( 111 )
xxx = np.arange( - 3 , 3 , 0.1 )
yyy = - weights[ 0 ] / weights[ 2 ] - weights[ 1 ] / weights[ 2 ] * xxx
ax.plot(xxx,yyy)
cord1 = []
cord0 = []
for i in range ( len (label)):
if label[i] = = 1 :
cord1.append(data[i][ 1 : 3 ])
else :
cord0.append(data[i][ 1 : 3 ])
cord1 = np.array(cord1)
cord0 = np.array(cord0)
ax.scatter(cord1[:, 0 ],cord1[:, 1 ],c = 'red' )
ax.scatter(cord0[:, 0 ],cord0[:, 1 ],c = 'green' )
plt.show()
def stocGradAscent(dataMat,labelVec,trainLoop):
m,n = np.shape(dataMat)
w = np.ones((n, 1 ))
for j in range (trainLoop):
dataIndex = range (m)
for i in range (m):
alpha = 4 / (i + j + 1 ) + 0.01
randIndex = int (np.random.uniform( 0 , len (dataIndex)))
predict = Sigmoid(np.dot(dataMat[dataIndex[randIndex]],w))
error = predict - labelVec[dataIndex[randIndex]]
w = w - alpha * error * dataMat[dataIndex[randIndex]].reshape(n, 1 )
np.delete(dataIndex,randIndex, 0 )
return w
if __name__ = = "__main__" :
data,label = loadData()
data = np.array(data).astype(np.float64)
label = [ int (item) for item in label]
weight = stocGradAscent(data,label, 300 )
plotBestFit(weight,data,label)
|
三、编程技巧
1.字符串提取
将字符串中的'\n', ‘\r', ‘\t', ' ‘去除,按空格符划分。
1
|
string.strip().split()
|
2.判断类型
1
|
if type (secondTree[value]).__name__ = = 'dict' :
|
3.乘法
numpy两个矩阵类型的向量相乘,结果还是一个矩阵
1
2
3
4
|
c = a * b
c
Out[ 66 ]: matrix([[ 6.830482 ]])
|
两个向量类型的向量相乘,结果为一个二维数组
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
b
Out[ 80 ]:
array([[ 1. ],
[ 1. ],
[ 1. ]])
a
Out[ 81 ]: array([ 1 , 2 , 3 ])
a * b
Out[ 82 ]:
array([[ 1. , 2. , 3. ],
[ 1. , 2. , 3. ],
[ 1. , 2. , 3. ]])
b * a
Out[ 83 ]:
array([[ 1. , 2. , 3. ],
[ 1. , 2. , 3. ],
[ 1. , 2. , 3. ]])
|
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://blog.csdn.net/weixin_37895339/article/details/78441688