逻辑回归
适用类型:解决二分类问题
逻辑回归的出现:线性回归可以预测连续值,但是不能解决分类问题,我们需要根据预测的结果判定其属于正类还是负类。所以逻辑回归就是将线性回归的结果,通过Sigmoid函数映射到(0,1)之间
线性回归的决策函数:数据与θ的乘法,数据的矩阵格式(样本数×列数),θ的矩阵格式(列数×1)
将其通过Sigmoid函数,获得逻辑回归的决策函数
使用Sigmoid函数的原因:
可以对(-∞, +∞)的结果,映射到(0, 1)之间作为概率
可以将1/2作为决策边界
数学特性好,求导容易
逻辑回归的损失函数
线性回归的损失函数维平方损失函数,如果将其用于逻辑回归的损失函数,则其数学特性不好,有很多局部极小值,难以用梯度下降法求解最优
这里使用对数损失函数
解释:如果一个样本为正样本,那么我们希望将其预测为正样本的概率p越大越好,也就是决策函数的值越大越好,则logp越大越好,逻辑回归的决策函数值就是样本为正的概率;如果一个样本为负样本,那么我们希望将其预测为负样本的概率越大越好,也就是(1-p)越大越好,即log(1-p)越大越好
为什么使用对数函数:样本集中有很多样本,要求其概率连乘,概率为0-1之间的数,连乘越来越小,利用log变换将其变为连加,不会溢出,不会超出计算精度
损失函数:: y(1->m)表示Sigmoid值(样本数×1),hθx(1->m)表示决策函数值(样本数×1),所以中括号的值(1×1)
二分类逻辑回归直线编码实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import minimize
from sklearn.preprocessing import PolynomialFeatures
class MyLogisticRegression:
def __init__( self ):
plt.rcParams[ "font.sans-serif" ] = [ "SimHei" ]
# 包含数据和标签的数据集
self .data = np.loadtxt( "./data2.txt" , delimiter = "," )
self .data_mat = self .data[:, 0 : 2 ]
self .label_mat = self .data[:, 2 ]
self .thetas = np.zeros(( self .data_mat.shape[ 1 ]))
# 生成多项式特征,最高6次项
self .poly = PolynomialFeatures( 6 )
self .p_data_mat = self .poly.fit_transform( self .data_mat)
def cost_func_reg( self , theta, reg):
"""
损失函数具体实现
:param theta: 逻辑回归系数
:param data_mat: 带有截距项的数据集
:param label_mat: 标签数据集
:param reg:
:return:
"""
m = self .label_mat.size
label_mat = self .label_mat.reshape( - 1 , 1 )
h = self .sigmoid( self .p_data_mat.dot(theta))
J = - 1 * ( 1 / m) * (np.log(h).T.dot(label_mat) + np.log( 1 - h).T.dot( 1 - label_mat))\
+ (reg / ( 2 * m)) * np. sum (np.square(theta[ 1 :]))
if np.isnan(J[ 0 ]):
return np.inf
return J[ 0 ]
def gradient_reg( self , theta, reg):
m = self .label_mat.size
h = self .sigmoid( self .p_data_mat.dot(theta.reshape( - 1 , 1 )))
label_mat = self .label_mat.reshape( - 1 , 1 )
grad = ( 1 / m) * self .p_data_mat.T.dot(h - label_mat) + (reg / m) * np.r_[[[ 0 ]], theta[ 1 :].reshape( - 1 , 1 )]
return grad
def gradient_descent_reg( self , alpha = 0.01 , reg = 0 , iterations = 200 ):
"""
逻辑回归梯度下降收敛函数
:param alpha: 学习率
:param reg:
:param iterations: 最大迭代次数
:return: 逻辑回归系数组
"""
m, n = self .p_data_mat.shape
theta = np.zeros((n, 1 ))
theta_set = []
for i in range (iterations):
grad = self .gradient_reg(theta, reg)
theta = theta - alpha * grad.reshape( - 1 , 1 )
theta_set.append(theta)
return theta, theta_set
def plot_data_reg( self , x_label = None , y_label = None , neg_text = "negative" , pos_text = "positive" , thetas = None ):
neg = self .label_mat = = 0
pos = self .label_mat = = 1
fig1 = plt.figure(figsize = ( 12 , 8 ))
ax1 = fig1.add_subplot( 111 )
ax1.scatter( self .p_data_mat[neg][:, 1 ], self .p_data_mat[neg][:, 2 ], marker = "o" , s = 100 , label = neg_text)
ax1.scatter( self .p_data_mat[pos][:, 1 ], self .p_data_mat[pos][:, 2 ], marker = "+" , s = 100 , label = pos_text)
ax1.set_xlabel(x_label, fontsize = 14 )
# 描绘逻辑回归直线(曲线)
if isinstance (thetas, type (np.array([]))):
x1_min, x1_max = self .p_data_mat[:, 1 ]. min (), self .p_data_mat[:, 1 ]. max ()
x2_min, x2_max = self .p_data_mat[:, 2 ]. min (), self .p_data_mat[:, 2 ]. max ()
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = self .sigmoid( self .poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas))
h = h.reshape(xx1.shape)
ax1.contour(xx1, xx2, h, [ 0.5 ], linewidths = 3 )
ax1.legend(fontsize = 14 )
plt.show()
@staticmethod
def sigmoid(z):
return 1.0 / ( 1 + np.exp( - z))
if __name__ = = '__main__' :
my_logistic_regression = MyLogisticRegression()
# my_logistic_regression.plot_data(x_label="线性不可分数据集")
thetas, theta_set = my_logistic_regression.gradient_descent_reg(alpha = 0.5 , reg = 0 , iterations = 500 )
my_logistic_regression.plot_data_reg(thetas = thetas, x_label = "$\\lambda$ = {}" . format ( 0 ))
thetas = np.zeros((my_logistic_regression.p_data_mat.shape[ 1 ], 1 ))
# 未知错误,有大佬解决可留言
result = minimize(my_logistic_regression.cost_func_reg, thetas,
args = ( 0 , ),
method = None ,
jac = my_logistic_regression.gradient_reg)
my_logistic_regression.plot_data_reg(thetas = result.x, x_label = "$\\lambda$ = {}" . format ( 0 ))
|
二分类问题逻辑回归曲线编码实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import minimize
from sklearn.preprocessing import PolynomialFeatures
class MyLogisticRegression:
def __init__( self ):
plt.rcParams[ "font.sans-serif" ] = [ "SimHei" ]
# 包含数据和标签的数据集
self .data = np.loadtxt( "./data2.txt" , delimiter = "," )
self .data_mat = self .data[:, 0 : 2 ]
self .label_mat = self .data[:, 2 ]
self .thetas = np.zeros(( self .data_mat.shape[ 1 ]))
# 生成多项式特征,最高6次项
self .poly = PolynomialFeatures( 6 )
self .p_data_mat = self .poly.fit_transform( self .data_mat)
def cost_func_reg( self , theta, reg):
"""
损失函数具体实现
:param theta: 逻辑回归系数
:param data_mat: 带有截距项的数据集
:param label_mat: 标签数据集
:param reg:
:return:
"""
m = self .label_mat.size
label_mat = self .label_mat.reshape( - 1 , 1 )
h = self .sigmoid( self .p_data_mat.dot(theta))
J = - 1 * ( 1 / m) * (np.log(h).T.dot(label_mat) + np.log( 1 - h).T.dot( 1 - label_mat))\
+ (reg / ( 2 * m)) * np. sum (np.square(theta[ 1 :]))
if np.isnan(J[ 0 ]):
return np.inf
return J[ 0 ]
def gradient_reg( self , theta, reg):
m = self .label_mat.size
h = self .sigmoid( self .p_data_mat.dot(theta.reshape( - 1 , 1 )))
label_mat = self .label_mat.reshape( - 1 , 1 )
grad = ( 1 / m) * self .p_data_mat.T.dot(h - label_mat) + (reg / m) * np.r_[[[ 0 ]], theta[ 1 :].reshape( - 1 , 1 )]
return grad
def gradient_descent_reg( self , alpha = 0.01 , reg = 0 , iterations = 200 ):
"""
逻辑回归梯度下降收敛函数
:param alpha: 学习率
:param reg:
:param iterations: 最大迭代次数
:return: 逻辑回归系数组
"""
m, n = self .p_data_mat.shape
theta = np.zeros((n, 1 ))
theta_set = []
for i in range (iterations):
grad = self .gradient_reg(theta, reg)
theta = theta - alpha * grad.reshape( - 1 , 1 )
theta_set.append(theta)
return theta, theta_set
def plot_data_reg( self , x_label = None , y_label = None , neg_text = "negative" , pos_text = "positive" , thetas = None ):
neg = self .label_mat = = 0
pos = self .label_mat = = 1
fig1 = plt.figure(figsize = ( 12 , 8 ))
ax1 = fig1.add_subplot( 111 )
ax1.scatter( self .p_data_mat[neg][:, 1 ], self .p_data_mat[neg][:, 2 ], marker = "o" , s = 100 , label = neg_text)
ax1.scatter( self .p_data_mat[pos][:, 1 ], self .p_data_mat[pos][:, 2 ], marker = "+" , s = 100 , label = pos_text)
ax1.set_xlabel(x_label, fontsize = 14 )
# 描绘逻辑回归直线(曲线)
if isinstance (thetas, type (np.array([]))):
x1_min, x1_max = self .p_data_mat[:, 1 ]. min (), self .p_data_mat[:, 1 ]. max ()
x2_min, x2_max = self .p_data_mat[:, 2 ]. min (), self .p_data_mat[:, 2 ]. max ()
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = self .sigmoid( self .poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas))
h = h.reshape(xx1.shape)
ax1.contour(xx1, xx2, h, [ 0.5 ], linewidths = 3 )
ax1.legend(fontsize = 14 )
plt.show()
@staticmethod
def sigmoid(z):
return 1.0 / ( 1 + np.exp( - z))
if __name__ = = '__main__' :
my_logistic_regression = MyLogisticRegression()
# my_logistic_regression.plot_data(x_label="线性不可分数据集")
thetas, theta_set = my_logistic_regression.gradient_descent_reg(alpha = 0.5 , reg = 0 , iterations = 500 )
my_logistic_regression.plot_data_reg(thetas = thetas, x_label = "$\\lambda$ = {}" . format ( 0 ))
thetas = np.zeros((my_logistic_regression.p_data_mat.shape[ 1 ], 1 ))
# 未知错误,有大佬解决可留言
result = minimize(my_logistic_regression.cost_func_reg, thetas,
args = ( 0 , ),
method = None ,
jac = my_logistic_regression.gradient_reg)
my_logistic_regression.plot_data_reg(thetas = result.x, x_label = "$\\lambda$ = {}" . format ( 0 ))
|
以上就是python 实现逻辑回归的详细内容,更多关于python 实现逻辑回归的资料请关注服务器之家其它相关文章!
原文链接:https://www.cnblogs.com/aitiknowledge/p/12668794.html