不服不行
看着cs229给出的代码,感觉读起来还有点吃力,
先记录一下
X = load('logsitic_x.txt');
Y = load('logsitic_y.txt');
X = [ones(size(X,1),1) X];
[theta,ll] = log_regression(X,Y,20);
m = size(X,1);
figure;hold on;
plot(X(Y<0,2),X(Y<0, 3),'rx','linewidth',2);
plot(X(Y>0,2),X(Y>0, 3),'go','linewidth',2);
x1 = min(X(:,2)):.01:max(X(:,2));
x2 = -(theta(1)/theta(3))-(theta(2)/theta(3))*x1;
plot(x1,x2,'linewidth',2);
xlable = 'x1';
ylable = 'y1';
下面是ps1 answer里面给出的函数
function [theta,ll] = log_regression(X,Y,max_iters)
mm = size(X,1)
nn = size(X,2)
theta = zeros (nn,1)
ll = zeros(nn,1);
for ii = 1:max_iters
margins = Y .* (X * theta);
%此步是计算logsitic函数的平均经验损失
ll(ii) = (1/mm) * sum(log(1+exp(-margins)));
%可以想象margin此时是99*1的矩阵
%probs 可以认为是 h(X)=1/exp(x'theta)
probs = 1 ./ (1+exp(margins));
%grad梯度 X' * (probds .* Y)
%就相当于 从1到mm 所有h(yi*xi)*yi*xi之后
%非常巧妙
grad = -(1/mm) * (X' * (probs .* Y))
%下面是求Hessian矩阵
H = (1/mm) * (X' * diag(probs .* (1-probs)) * X)
theta = theta - H \ grad;
end
end
ll是 log loss
H是 Hessian