!cd 'data/data107306' && unzip -q img.zip
# 导入所需要的库
from sklearn.utils import shuffle
import os
import pandas as pd
import numpy as np
from PIL import Image
import paddle
import paddle.nn as nn
from paddle.io import Dataset
import paddle.vision.transforms as T
import paddle.nn.functional as F
from paddle.metric import Accuracy
import warnings
warnings.filterwarnings("ignore")
# 读取数据
train_images = pd.read_csv('data/data107306/img/df_all.csv')
train_images = shuffle(train_images)
# 划分训练集和校验集
all_size = len(train_images)
train_size = int(all_size * 0.9)
train_image_list = train_images[:train_size]
val_image_list = train_images[train_size:]
train_image_path_list = train_image_list['image'].values
label_list = train_image_list['label'].values
train_label_list = paddle.to_tensor(label_list, dtype='int64')
val_image_path_list = val_image_list['image'].values
val_label_list1 = val_image_list['label'].values
val_label_list = paddle.to_tensor(val_label_list1, dtype='int64')
# 定义数据预处理
data_transforms = T.Compose([
T.Resize(size=(448, 448)),
T.Transpose(), # HWC -> CHW
T.Normalize(
mean = [0, 0, 0],
std = [255, 255, 255],
to_rgb=True)
])
# 构建Dataset
class MyDataset(paddle.io.Dataset):
"""
步骤一:继承paddle.io.Dataset类
"""
def __init__(self, train_img_list, val_img_list,train_label_list,val_label_list, mode='train'):
"""
步骤二:实现构造函数,定义数据读取方式,划分训练和测试数据集
"""
super(MyDataset, self).__init__()
self.img = []
self.label = []
self.valimg = []
self.vallabel = []
# 借助pandas读csv的库
self.train_images = train_img_list
self.test_images = val_img_list
self.train_label = train_label_list
self.test_label = val_label_list
# self.mode = mode
if mode == 'train':
# 读train_images的数据
for img,la in zip(self.train_images, self.train_label):
self.img.append('data/data107306/img/imgV/'+img)
self.label.append(la)
else :
# 读test_images的数据
for img,la in zip(self.test_images, self.test_label):
self.img.append('data/data107306/img/imgV/'+img)
self.label.append(la)
def load_img(self, image_path):
# 实际使用时使用Pillow相关库进行图片读取即可,这里我们对数据先做个模拟
image = Image.open(image_path).convert('RGB')
image = np.array(image).astype('float32')
return image
def __getitem__(self, index):
"""
步骤三:实现__getitem__方法,定义指定index时如何获取数据,并返回单条数据(训练数据,对应的标签)
"""
# if self.mode == 'train':
image = self.load_img(self.img[index])
label = self.label[index]
return data_transforms(image), label
def __len__(self):
"""
步骤四:实现__len__方法,返回数据集总数目
"""
return len(self.img)
#train_loader
train_dataset = MyDataset(train_img_list=train_image_path_list, val_img_list=val_image_path_list, train_label_list=train_label_list, val_label_list=val_label_list, mode='train')
train_loader = paddle.io.DataLoader(train_dataset, places=paddle.CPUPlace(), batch_size=4, shuffle=True, num_workers=0)
#val_loader
val_dataset = MyDataset(train_img_list=train_image_path_list, val_img_list=val_image_path_list, train_label_list=train_label_list, val_label_list=val_label_list, mode='test')
val_loader = paddle.io.DataLoader(val_dataset, places=paddle.CPUPlace(), batch_size=4, shuffle=True, num_workers=0)
from res2net import Res2Net50_vd_26w_4s
# 模型封装
model_re2 = Res2Net50_vd_26w_4s(class_dim=4)
import paddle.nn.functional as F
import paddle
modelre2_state_dict = paddle.load("Res2Net50_vd_26w_4s_pretrained.pdparams")
model_re2.set_state_dict(modelre2_state_dict, use_structured_name=True)
model_re2.train()
epochs = 2
optim1 = paddle.optimizer.Adam(learning_rate=3e-4, parameters=model_re2.parameters())
class topk_crossEntrophy(nn.Layer):
def __init__(self, top_k=0.7):
super(topk_crossEntrophy, self).__init__()
self.loss = nn.NLLLoss()
self.top_k = top_k
self.softmax = nn.LogSoftmax()
return
def forward(self, inputs, target):
softmax_result = self.softmax(inputs)
loss1 = paddle.zeros([1])
for idx, row in enumerate(softmax_result):
gt = target[idx]
pred = paddle.unsqueeze(row, 0)
cost = self.loss(pred, gt)
loss1 = paddle.concat((loss1, cost), 0)
loss1 = loss1[1:]
if self.top_k == 1:
valid_loss1 = loss1
# print(len(loss1))
index = paddle.topk(loss1, int(self.top_k * len(loss1)))
valid_loss1 = loss1[index[1]]
return paddle.mean(valid_loss1)
topk_loss = topk_crossEntrophy()
from numpy import *
# 用Adam作为优化函数
for epoch in range(epochs):
loss1_train = []
loss2_train = []
loss_train = []
acc1_train = []
acc2_train = []
acc_train = []
for batch_id, data in enumerate(train_loader()):
x_data = data[0]
y_data = data[1]
y_data1 = paddle.topk(y_data, 1)[1]
predicts1 = model_re2(x_data)
loss1 = topk_loss(predicts1, y_data1)
# 计算损失
acc1 = paddle.metric.accuracy(predicts1, y_data)
loss1.backward()
if batch_id % 1 == 0:
print("epoch: {}, batch_id: {}, loss1 is: {}, acc1 is: {}".format(epoch, batch_id, loss1.numpy(), acc1.numpy()))
optim1.step()
optim1.clear_grad()
loss1_eval = []
loss2_eval = []
loss_eval = []
acc1_eval = []
acc2_eval = []
acc_eval = []
for batch_id, data in enumerate(val_loader()):
x_data = data[0]
y_data = data[1]
y_data1 = paddle.topk(y_data, 1)[1]
predicts1 = model_re2(x_data)
loss1 = topk_loss(predicts1, y_data1)
loss1_eval.append(loss1.numpy())
# 计算acc
acc1 = paddle.metric.accuracy(predicts1, y_data)
acc1_eval.append(acc1)
if batch_id % 100 == 0:
print('************Eval Begin!!***************')
print("epoch: {}, batch_id: {}, loss1 is: {}, acc1 is: {}".format(epoch, batch_id, loss1.numpy(), acc1.numpy()))
print('************Eval End!!***************')