使用pytorch编写神经网络(有注释)
import torch
import numpy as np
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" #网上搜的解决防止报错的方案
"""
读取数据
"""
#读取数据函数
def load(data_path):
my_matrix = np.loadtxt(data_path ,dtype = float, usecols = [1], unpack = True)
return my_matrix
sum_matirx = np.zeros((1600,1)) #该数据有1600行
path = "E:\学校的一些资料\文档\大二暑假\数据\RamanSystem-master\RamanSystem-master\光谱(1)"#文件夹目录
files= os.listdir(path) #得到文件夹下的所有文件名称
txts = []
for file in files: #遍历文件夹
#print("file: ",file)
if file == "三文鱼" or file == "鲷鱼":
position = path+'\\'+ file #构造绝对路径,"\\",其中一个'\'为转义符
files2 = os.listdir(position)
for file2 in files2:
#print("file2: ",file2)
position2 = position + '\\' + file2
sum_matirx = np.c_[sum_matirx, load(position2)]
sum_matirx = np.delete(sum_matirx,0,1)#删掉第一列,第一个数字代表删除的行或列号,第二个数字1代表删列,0代表删行
sum_matirx = sum_matirx.T
print("最初始的数据shape:\n",sum_matirx.shape)
#print("原始数据:\n",sum_matirx)
"""
PCA降维(引用库中的)
"""
import sklearn
from sklearn.decomposition import PCA#加载PCA算法包
dimension = int(4)#确定降维后的维度
category = int(2)#分类的标签数
pca = PCA(dimension) #保留所有成分 #选取累计贡献率大于80%的主成分(dimension个主成分)
pca.fit(sum_matirx)
#print(pca.components_) #返回模型的各个特征向量
#print(pca.explained_variance_ratio_) #返回各个成分各自的方差百分比(也称贡献率)
low_d = pca.transform(sum_matirx)
low_d = torch.tensor(low_d, dtype=torch.float32)
#print("low_d: \n",low_d)
print("low_d.shape: \n", low_d.shape)
"""
设置标签
"""
type_num = 50 #每组样本数
low_d_labels = torch.zeros(low_d.shape[0], dtype=torch.float32)
for i in range(category):
for j in range(type_num):
low_d_labels[i*type_num + j] = (int)(i+1)
#print("low_d_labels: \n", low_d_labels)
print("low_d_labels.shape: \n", low_d_labels.shape)
"""
数据处理分类
low_d 降维后的特征值
low_d_labels 对应的标签(1~8)
"""
test_num = 10 #测试样例数
x_train = low_d[0:type_num-test_num]
y_train = low_d_labels[0:type_num-test_num]
for i in range(1,category):
index = i*type_num
x_train = torch.cat([x_train,low_d[index:index + type_num-test_num]], 0) #第二个参数是指在哪个维度进行拼接
y_train = torch.cat([y_train,low_d_labels[index:index + type_num-test_num]], 0)
#把测试样本拼接到后面40个
for i in range(0, category):
index = i*type_num
x_train = torch.cat([x_train,low_d[index+type_num-test_num:index + type_num]], 0)
y_train = torch.cat([y_train,low_d_labels[index+type_num-test_num:index + type_num]], 0)
print("x_train.shape: \n",x_train.shape)
print("y_train.shape: \n",y_train.shape)
"""
书上代码
"""
#确定网络结构
#三层神经网络,前2个隐藏层分别设置为6个和2个神经元,并使用逻辑函数激活,最后1层输出层有1个神经元
import torch.nn as nn
hidden_features = [240, 8] #前2个隐藏层分别设置为6个和2个神经元
layers = [nn.Linear(4, hidden_features[0]), ]
for idx, hidden_feature in enumerate(hidden_features):
layers.append(nn.Tanh())
next_hidden_feature = hidden_features[idx + 1] \
if idx + 1 < len(hidden_features) else 1 #这个1是指组后的输出层的维度
layers.append(nn.Linear(hidden_feature, next_hidden_feature))
net = nn.Sequential(*layers) #前馈神经网络
print('神经网络为: \n',format(net))
#训练
import torch.optim
optimizer = torch.optim.Adam(net.parameters())
criterion = nn.MSELoss()
#将数据转化为张量
features = x_train
labels = y_train
print(": \n", features.shape)
print(": \n", labels.shape)
train_entry_num = 80 #选择训练样本数
train_num = 20 #选择测试样本数
n_iter = 100000 #最大迭代次数
for step in range(n_iter):
outputs = net(features)
preds = outputs.squeeze() #默认去掉维数为1的维度,就是少掉一层中括号
#print(": \n",preds.shape)
loss_train = criterion(preds[:train_entry_num], labels[:train_entry_num])
loss_validate = criterion(preds[train_entry_num:train_entry_num + train_num], labels[train_entry_num:train_entry_num + train_num])
if step % 10000 == 0:
print('#{} 训练集MSE = {:g}, 验证集MSE = {:g}'.format(step, loss_train, loss_validate))
#print("验证集的大小:\n",preds[train_entry_num:train_entry_num + train_num].shape)
print("验证集的预测值:\n",preds[train_entry_num:train_entry_num + train_num])
print("验证集的标准值:\n",labels[train_entry_num:train_entry_num + train_num])
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
#print("outputs: \n", outputs)
count = 0
for i in range (train_num):
temp = preds[i]
if temp >= 0.5 and temp < 1.5:
temp = 1
if temp >= 1.5 and temp < 2.5:
temp = 2
if temp == labels[i]:
count = count + 1
print("预测准确数:\n",count)
print("预测准确率:\n",count / train_num)
print ('训练集MSE = {:g}, 验证集MSE = {:g}'.format(loss_train, loss_validate))