1.将图片的路径和标签写入csv文件并实现读取
# 创建一个文件,包含image,存放方式:label pokemeon\\mew\\0001.jpg,0
def load_csv(self,filename):
if not os.path.exists(os.path.join(self.root,filename)):
images = [] # 将所有的信息组成一个列表,类别信息通过中间的一个路径判断
for name in self.name2label.keys():
# pokemeon\\mew\\0001.jpg mew可以通过字典查看其类别
images += glob.glob(os.path.join(self.root,name,'*.png'))#img的完整路径
images += glob.glob(os.path.join(self.root,name,'*.jpg'))
random.shuffle(images)
with open(os.path.join(self.root,filename),'w') as f:
writer = csv.writer(f)
for img in images:
name = img.split(os.sep)
label = self.name2label[name[-2]]
writer.writerow([img,label]) # 从csv中读取文件
images, labels = [], []
with open(os.path.join(self.root,filename),'r') as f:
reader = csv.reader(f)
for row in reader:
img,label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels) # 保证数据长度一致
return images,labels
2.加载自定义数据集
"""
自定义数据集
image_resize
data argumentation(数据增强):Rotate,crop
normalize:mean,std
ToTensor """
import torch
import os,glob
import random,csv
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
from PIL import Image
import visdom class Pokemon(Dataset):
def __init__(self,root,resize,mode):
super(Pokemon,self).__init__()
self.root = root
self.resize = resize
self.name2label = {}
for name in os.listdir(os.path.join(root)): #把文件和dir都会加载近来
if not sorted(os.path.isdir(os.path.join(root,name))):#排序后,文件夹顺序固定了
continue
self.name2label[name] = len(self.name2label.keys())
# name2label:{文件夹名,类别编号}
# 创建一个文件,包含image,存放方式:label pokemeon\\mew\\0001.jpg,0
self.images, self.labels = self.load_csv('images.csv')
# 对数据进行裁剪,mode:train-0.6,validation-0.2,test-0.2数据量是不同的
if mode == 'train':
self.images = self.images[:,int(len(self.images)*0.6)]
self.labels = self.labels[:,int(len(self.images)*0.6)]
elif mode == 'val':
self.images = self.images[int(len(self.images)*0.6):int(len(self.images)*0.8)]
self.labels = self.labels[int(len(self.labels)*0.6):int(len(self.labels)*0.8)]
else:
self.images = self.images[int(len(self.images) * 0.8):]
self.labels = self.labels[int(len(self.labels) * 0.8):] def load_csv(self,filename):
if not os.path.exists(os.path.join(self.root,filename)):
images = [] # 将所有的信息组成一个列表,类别信息通过中间的一个路径判断
for name in self.name2label.keys():
# pokemeon\\mew\\0001.jpg mew可以通过字典查看其类别
images += glob.glob(os.path.join(self.root,name,'*.png'))#img的完整路径
images += glob.glob(os.path.join(self.root,name,'*.jpg'))
random.shuffle(images)
with open(os.path.join(self.root,filename),'w') as f:
writer = csv.writer(f)
for img in images:
name = img.split(os.sep)
label = self.name2label[name[-2]]
writer.writerow([img,label])
# 从csv中读取文件
images, labels = [], []
with open(os.path.join(self.root,filename),'r') as f:
reader = csv.reader(f)
for row in reader:
img,label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels) # 保证数据长度一致
return images,labels def __len__(self):
return len(self.images) def __getitem__(self, idx):
# idx是[0-len(self.images]
# self.images,self.label
# img:pokemeon\\mew\\0001.jpg(这是一个路径)要转变成img数据
# label:是数字
img, label = self.images[idx], self.labels[idx]
tf = transforms.Compose([
lambda x:Image.open(x).convert('RGB'),# string path -> img data
transforms.Resize(int(self.resize*1.25), int(self.resize*1.25)),
transforms.Randomrotation(15), # 旋转度数
transforms.CenterCrop(self.resize),#中心裁剪,保留resize大小
transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406],
std=[0.229,0.224,0.225]) # 归一化之后,范围为-1~1,之前的图片范围为0~1
])
img = tf(img) # 将path转换成数据
label = torch.tensor(label) # 将变量label转换成tensor
return img,label def denormalize(self,x_hat):
mean=[0.485,0.456,0.406]
std=[0.229,0.224,0.225]
# x:[c,h,w]
# x_hat = (x-mean)/std
# maen[3]->[3,1,1]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std+mean
return x def main():
import torchvision
vis = visdom.Visdom()
"""
如果存储比较规范的话,可以使用下面简单的代码加载数据集,文件夹的标签从0开始编码
tf = transforms.Compose([
transforms.Resize((64,64)),
transforms.ToTensor()
])
db = torchvision.datasets.ImageFolder('./pokemon',transform=tf)
loader = DataLoader(db,batch_size=32,shuffle=True)
print(db.class_to_idx) #查看类标签 """
db = Pokemon('./pokemon', 224, 'train') # 根据idx,返回一个
x,y = next(iter(db))
print('sample:',x.shape,y.shape)
#可视化
vis.image(db.denormalize(x),win='sample_x',opts=dict(title = 'sample_x'))
# 加载一批
loader = DataLoader(db,batch_size = 32,shuffle=True,num_workers=8 )
for x,y in loader:
vis.images(db.denormalize(x), nrow=8, win='batch',opts=dict(title='batch'))
vis.text(str(y.numpy()),win='label',opts=dict(title='batch-y')) if __name__ == '__main__':
main()
小结:
在加载自定义数据集时,一般步骤
1.定义一个类继承Dataset
2.在类中读取数据集(图片的路径),重写len函数,和getitem函数
在len函数中返回数据集的长度
在getitem函数中,处理一张图片,单个图片路径转换成图片数据(包括transform转换),返回该图片数据和标签
3,将处理好的数据集(均为张量)放入DataLoader中,进行分批
loader = DataLoader(db,batch_size = 32,shuffle=True,num_workers=8 )
4.训练时通过enumerate遍历每个batchsize