【单层神经网络】基于MXNet的线性回归实现(底层实现)

时间:2025-02-04 07:00:15
from IPython import display from matplotlib import pyplot as plt from mxnet import autograd, nd import random ''' 获取(生成)训练集 ''' input_num = 2 # 输入个数 examples_num = 1000 # 生成样本个数 # 确定真实模型参数 real_W = [10.9, -8.7] real_bias = 6.5 features = nd.random.normal(scale=1, shape=(examples_num, input_num)) # 标准差=1,均值缺省=0 labels = real_W[0]*features[:,0] + real_W[1]*features[:,1] + real_bias # 根据特征和参数生成对应标签 labels_noise = labels + nd.random.normal(scale=0.1, shape=labels.shape) # 为标签附加噪声,模拟真实情况 # 绘制标签和特征的散点图(矢量图) # def use_svg_display(): # display.set_matplotlib_formats('svg') # def set_figure_size(figsize=(3.5,2.5)): # use_svg_display() # plt.rcParams['figure.figsize'] = figsize # set_figure_size() # plt.scatter(features[:,0].asnumpy(), labels_noise.asnumpy(), 1) # plt.scatter(features[:,1].asnumpy(), labels_noise.asnumpy(), 1) # plt.show() # 创建一个迭代器(确定从数据集获取数据的方式) def data_iter(batch_size, features, labels): num = len(features) indices = list(range(num)) # 生成索引数组 random.shuffle(indices) # 打乱indices # 该遍历方式同时确保了随机采样和无遗漏 for i in range(0, num, batch_size): j = nd.array(indices[i: min(i+batch_size, num)]) # 对indices从i开始取,取batch_size个样本,并转换为列表 yield features.take(j), labels.take(j) # take方法使用索引数组,从features和labels提取所需数据 """ 训练的基础准备 """ # 声明训练变量,并赋高斯随机初始值 w = nd.random.normal(scale=0.01, shape=(input_num)) b = nd.zeros(shape=(1,)) # b = nd.zeros(1) # 不同写法,等价于上面的 w.attach_grad() # 为需要迭代的参数申请求梯度空间 b.attach_grad() # 定义模型 def linreg(X, w, b): return nd.dot(X,w)+b # 定义损失函数 def squared_loss(y_hat, y): return (y_hat - y.reshape(y_hat.shape)) **2 /2 # 定义寻优算法 def sgd(params, learning_rate, batch_size): for param in params: # 新参数 = 原参数 - 学习率*当前批量的参数梯度/当前批量的大小 param[:] = param - learning_rate * param.grad / batch_size # 确定超参数和学习方式 lr = 0.03 num_iterations = 5 net = linreg # 目标模型 loss = squared_loss # 代价函数(损失函数) batch_size = 10 # 每次随机小批量的大小 ''' 开始训练 ''' for iteration in range(num_iterations): # 确定迭代次数 for x, y in data_iter(batch_size, features, labels): with autograd.record(): l = loss(net(x,w,b), y) # 求当前小批量的总损失 l.backward() # 求梯度 sgd([w,b], lr, batch_size) # 梯度更新参数 train_l = loss(net(features,w,b), labels) print("iteration %d, loss %f" % (iteration+1, train_l.mean().asnumpy())) # 打印比较真实参数和训练得到的参数 print("real_w " + str(real_W) + "\n train_w " + str(w)) print("real_w " + str(real_bias) + "\n train_b " + str(b))