概述
本文不使用深度学习框架来构建Softmax模型,从零开始实现Softmax回归,并使用Fashion-MNIXT数据集进行了实验。本文需要的前导知识可参考我之前的几篇博客:
- MXNET框架中NDArray的基本操作:【深度学习】MXNet基本数据结构NDArray常用操作
- MXNET求解梯度:【深度学习】MXNet自动求解函数梯度
- Softmax的原理:【深度学习】Softmax回归(一)概念和原理
- 利用MXNET操作Fashion-MNIST数据集:【深度学习】Fashion-MNIST数据集简介
实现步骤
导入需要的包
import gluonbook as gb
from mxnet import autograd, nd
关于gluonbook包,可参考博文【深度学习】Fashion-MNIST数据集简介的末尾给出的介绍。
读取数据
# 小批量读取Fashion-MNIST数据集
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
初始化参数
# 输入为28*28=784维向量,输出为10个类别
num_inputs = 784
num_outputs = 10
# 设置softmax回归中的参数矩阵
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
W.attach_grad()
b = nd.zeros(num_outputs)
b.attach_grad()
实现softmax运算
# 定义softmax运算:X行数等于样本数,列数为输出个数
def softmax(X):
# 先对每个元素做指数运算
X_exp = X.exp()
# 求指数矩阵每行的和
partition = X_exp.sum(axis=1, keepdims=True)
# 返回
return X_exp/partition
定义Softmax模型
# 定义softmax模型
def net(X):
# reshape将28*28矩阵改为784维输入向量
return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)
定义损失函数
# 定义损失函数
def cross_entropy(y_prediction, y):
return - (nd.pick(y_prediction, y).log())
计算准确率
# 对比预测值和标签值,计算准确率
def accuracy(y_prediction, y):
return (y_prediction.argmax(axis=1) == y.astype('float32')).mean().asscalar()
# 计算所有批次的平均准确率
def evaluate_accuracy(data_iter, net):
acc = 0
for X, y in data_iter:
acc += accuracy(net(X), y)
return acc/len(data_iter)
对模型进行训练
# 训练
def train(net, train_iter, test_iter, loss, epochs, batch_size, params=None, lr=None, trainer=None):
for epoch in range(epochs):
# 总的损失
train_loss_sum = 0
# 总的准确率
train_acc_sum = 0
for X, y in train_iter:
with autograd.record():
y_pre = net(X)
los = loss(y_pre, y)
# 求梯度
los.backward()
if trainer is None:
# 小批量随机梯度下降
gb.sgd(params, lr, batch_size)
else:
trainer.step(batch_size)
train_loss_sum += los.mean().asscalar()
train_acc_sum += accuracy(y_pre, y)
print('epoch %d, loss %.4f, train acc %.3f'% (epoch + 1, train_loss_sum / len(train_iter), train_acc_sum / len(train_iter)))
进行测试
# 利用测试集做测试,得出准确率
test_acc = evaluate_accuracy(test_iter, net)
print("Training done, test accuracy is: ", test_acc)
完整代码和实验结果
# coding=utf-8
# author: BebDong
# 2018/12/18
# 从0开始实现softmax回归
import gluonbook as gb
from mxnet import autograd, nd
# 定义softmax运算:X行数等于样本数,列数为输出个数
def softmax(X):
# 先对每个元素做指数运算
X_exp = X.exp()
# 求指数矩阵每行的和
partition = X_exp.sum(axis=1, keepdims=True)
# 返回
return X_exp/partition
# 定义softmax模型
def net(X):
# reshape将28*28矩阵改为784维输入向量
return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)
# 定义损失函数
def cross_entropy(y_prediction, y):
return - (nd.pick(y_prediction, y).log())
# 对比预测值和标签值,计算准确率
def accuracy(y_prediction, y):
return (y_prediction.argmax(axis=1) == y.astype('float32')).mean().asscalar()
# 计算所有批次的平均准确率
def evaluate_accuracy(data_iter, net):
acc = 0
for X, y in data_iter:
acc += accuracy(net(X), y)
return acc/len(data_iter)
# 训练
def train(net, train_iter, test_iter, loss, epochs, batch_size, params=None, lr=None, trainer=None):
for epoch in range(epochs):
# 总的损失
train_loss_sum = 0
# 总的准确率
train_acc_sum = 0
for X, y in train_iter:
with autograd.record():
y_pre = net(X)
los = loss(y_pre, y)
# 求梯度
los.backward()
if trainer is None:
# 小批量随机梯度下降
gb.sgd(params, lr, batch_size)
else:
trainer.step(batch_size)
train_loss_sum += los.mean().asscalar()
train_acc_sum += accuracy(y_pre, y)
print('epoch %d, loss %.4f, train acc %.3f'% (epoch + 1, train_loss_sum / len(train_iter), train_acc_sum / len(train_iter)))
# 小批量读取Fashion-MNIST数据集
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
# 输入为28*28=784维向量,输出为10个类别
num_inputs = 784
num_outputs = 10
# 设置softmax回归中的参数矩阵
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
W.attach_grad()
b = nd.zeros(num_outputs)
b.attach_grad()
# 训练模型
epochs, lr = 10, 0.1
train(net, train_iter, test_iter, cross_entropy, epochs, batch_size, [W, b], lr)
# 利用测试集做测试,得出准确率
test_acc = evaluate_accuracy(test_iter, net)
print("Training done, test accuracy is: ", test_acc)
我运行的实验结果如下:
备注
本文为《动手学深度学习》的学习笔记,原书链接:http://zh.diveintodeeplearning.org/chapter_deep-learning-basics/softmax-regression-scratch.html
代码中的gluonbook包是这本书籍封装的工具包,它把书中描述的将来会复用的所有方法进行了封装。如果您并未按照书中给出的配置文件搭建环境,那可以点击这里单独下载gluonbook包,并将其移动至恰当的位置。
最后,如果带实现的代码有任何疑问,欢迎交流!