使用pytorch编写神经网络(有注释)

时间:2024-11-13 10:46:14
import torch import numpy as np import os os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" #网上搜的解决防止报错的方案 """ 读取数据 """ #读取数据函数 def load(data_path): my_matrix = np.loadtxt(data_path ,dtype = float, usecols = [1], unpack = True) return my_matrix sum_matirx = np.zeros((1600,1)) #该数据有1600行 path = "E:\学校的一些资料\文档\大二暑假\数据\RamanSystem-master\RamanSystem-master\光谱(1)"#文件夹目录 files= os.listdir(path) #得到文件夹下的所有文件名称 txts = [] for file in files: #遍历文件夹 #print("file: ",file) if file == "三文鱼" or file == "鲷鱼": position = path+'\\'+ file #构造绝对路径,"\\",其中一个'\'为转义符 files2 = os.listdir(position) for file2 in files2: #print("file2: ",file2) position2 = position + '\\' + file2 sum_matirx = np.c_[sum_matirx, load(position2)] sum_matirx = np.delete(sum_matirx,0,1)#删掉第一列,第一个数字代表删除的行或列号,第二个数字1代表删列,0代表删行 sum_matirx = sum_matirx.T print("最初始的数据shape:\n",sum_matirx.shape) #print("原始数据:\n",sum_matirx) """ PCA降维(引用库中的) """ import sklearn from sklearn.decomposition import PCA#加载PCA算法包 dimension = int(4)#确定降维后的维度 category = int(2)#分类的标签数 pca = PCA(dimension) #保留所有成分 #选取累计贡献率大于80%的主成分(dimension个主成分) pca.fit(sum_matirx) #print(pca.components_) #返回模型的各个特征向量 #print(pca.explained_variance_ratio_) #返回各个成分各自的方差百分比(也称贡献率) low_d = pca.transform(sum_matirx) low_d = torch.tensor(low_d, dtype=torch.float32) #print("low_d: \n",low_d) print("low_d.shape: \n", low_d.shape) """ 设置标签 """ type_num = 50 #每组样本数 low_d_labels = torch.zeros(low_d.shape[0], dtype=torch.float32) for i in range(category): for j in range(type_num): low_d_labels[i*type_num + j] = (int)(i+1) #print("low_d_labels: \n", low_d_labels) print("low_d_labels.shape: \n", low_d_labels.shape) """ 数据处理分类 low_d 降维后的特征值 low_d_labels 对应的标签(1~8""" test_num = 10 #测试样例数 x_train = low_d[0:type_num-test_num] y_train = low_d_labels[0:type_num-test_num] for i in range(1,category): index = i*type_num x_train = torch.cat([x_train,low_d[index:index + type_num-test_num]], 0) #第二个参数是指在哪个维度进行拼接 y_train = torch.cat([y_train,low_d_labels[index:index + type_num-test_num]], 0) #把测试样本拼接到后面40for i in range(0, category): index = i*type_num x_train = torch.cat([x_train,low_d[index+type_num-test_num:index + type_num]], 0) y_train = torch.cat([y_train,low_d_labels[index+type_num-test_num:index + type_num]], 0) print("x_train.shape: \n",x_train.shape) print("y_train.shape: \n",y_train.shape) """ 书上代码 """ #确定网络结构 #三层神经网络,前2个隐藏层分别设置为6个和2个神经元,并使用逻辑函数激活,最后1层输出层有1个神经元 import torch.nn as nn hidden_features = [240, 8] #前2个隐藏层分别设置为6个和2个神经元 layers = [nn.Linear(4, hidden_features[0]), ] for idx, hidden_feature in enumerate(hidden_features): layers.append(nn.Tanh()) next_hidden_feature = hidden_features[idx + 1] \ if idx + 1 < len(hidden_features) else 1 #这个1是指组后的输出层的维度 layers.append(nn.Linear(hidden_feature, next_hidden_feature)) net = nn.Sequential(*layers) #前馈神经网络 print('神经网络为: \n',format(net)) #训练 import torch.optim optimizer = torch.optim.Adam(net.parameters()) criterion = nn.MSELoss() #将数据转化为张量 features = x_train labels = y_train print(": \n", features.shape) print(": \n", labels.shape) train_entry_num = 80 #选择训练样本数 train_num = 20 #选择测试样本数 n_iter = 100000 #最大迭代次数 for step in range(n_iter): outputs = net(features) preds = outputs.squeeze() #默认去掉维数为1的维度,就是少掉一层中括号 #print(": \n",preds.shape) loss_train = criterion(preds[:train_entry_num], labels[:train_entry_num]) loss_validate = criterion(preds[train_entry_num:train_entry_num + train_num], labels[train_entry_num:train_entry_num + train_num]) if step % 10000 == 0: print('#{} 训练集MSE = {:g}, 验证集MSE = {:g}'.format(step, loss_train, loss_validate)) #print("验证集的大小:\n",preds[train_entry_num:train_entry_num + train_num].shape) print("验证集的预测值:\n",preds[train_entry_num:train_entry_num + train_num]) print("验证集的标准值:\n",labels[train_entry_num:train_entry_num + train_num]) optimizer.zero_grad() loss_train.backward() optimizer.step() #print("outputs: \n", outputs) count = 0 for i in range (train_num): temp = preds[i] if temp >= 0.5 and temp < 1.5: temp = 1 if temp >= 1.5 and temp < 2.5: temp = 2 if temp == labels[i]: count = count + 1 print("预测准确数:\n",count) print("预测准确率:\n",count / train_num) print ('训练集MSE = {:g}, 验证集MSE = {:g}'.format(loss_train, loss_validate))