[卷积神经网络]使用YOLOv11训练自己的模型

YoloV11的源码：YOLOv11

一、数据集准备

首先，准备好自己的数据集，包含图像文件和标注文件，因为我的数据集上Voc格式，所以需要先转为yolo格式，可以使用下面的脚本进行转换。

import os
import shutil
import xml.etree.ElementTree as ET


# 类别映射 (需要记住这个顺序，后续YOLO的配置文件中也要与这个顺序一致)
class_mapping = {
    'cls1':0,
    'cls2':1,
    # ...
}

def convert_voc_to_yolo(voc_annotation_file, yolo_label_file):
    tree = ET.parse(voc_annotation_file)
    root = tree.getroot()

    size = root.find('size')
    width = float(size.find('width').text)
    height = float(size.find('height').text)

    with open(yolo_label_file, 'w') as f:
        for obj in root.findall('object'):
            cls = obj.find('name').text
            if cls not in class_mapping:
                continue
            cls_id = class_mapping[cls]
            xmlbox = obj.find('bndbox')
            xmin = float(xmlbox.find('xmin').text)
            ymin = float(xmlbox.find('ymin').text)
            xmax = float(xmlbox.find('xmax').text)
            ymax = float(xmlbox.find('ymax').text)

            x_center = (xmin + xmax) / 2.0 / width
            y_center = (ymin + ymax) / 2.0 / height
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height

            f.write(f"{cls_id} {x_center} {y_center} {w} {h}\n")

if __name__ == '__main__':
    
    # VOC格式数据集路径
    voc_data_path = 'VocData'    #请修改为自己的Voc数据集路径
    voc_annotations_path = os.path.join(voc_data_path, 'Annotations')
    voc_images_path = os.path.join(voc_data_path, 'JPEGImages')

    # YOLO格式数据集保存路径
    yolo_data_path = 'datasets/VOC'    #请修改为自己的输出路径
    yolo_images_path = os.path.join(yolo_data_path, 'images')
    yolo_labels_path = os.path.join(yolo_data_path, 'labels')

    # 创建YOLO格式数据集目录
    os.makedirs(yolo_images_path, exist_ok=True)
    os.makedirs(yolo_labels_path, exist_ok=True)
    
    print('开始转换')
    i = 0

    # 遍历VOC数据集的Annotations目录，进行转换
    for voc_annotation in os.listdir(voc_annotations_path):
        if voc_annotation.endswith('.xml'):
            voc_annotation_file = os.path.join(voc_annotations_path, voc_annotation)
            image_id = os.path.splitext(voc_annotation)[0]
            voc_image_file = os.path.join(voc_images_path, f"{image_id}.jpg")
            yolo_label_file = os.path.join(yolo_labels_path, f"{image_id}.txt")
            yolo_image_file = os.path.join(yolo_images_path, f"{image_id}.jpg")

            convert_voc_to_yolo(voc_annotation_file, yolo_label_file)
            if os.path.exists(voc_image_file):
                shutil.copy(voc_image_file, yolo_image_file)
            i+=1
            print('{}/{}'.format(i,len(os.listdir(voc_annotations_path))))

    print("转换完成！")

在完成数据集格式的转换后还要对其进行划分，使用下面的代码将数据集划分为train,val,test

import os, shutil
from sklearn.model_selection import train_test_split


val_test_size = 0.2    #验证集占比
test_size = 0.5
postfix = 'jpg'
imgpath = 'datasets/VOC/images'    #数据集图像文件夹
txtpath =  'datasets/VOC/labels'   #数据集标签文件夹

output_folder = 'datasets\TestA'    #输出文件夹

output_train_img_folder = output_folder +'/images/train'
output_val_img_folder =  output_folder +'/images/val'
output_test_img_folder =  output_folder +'/images/test'
output_train_txt_folder =  output_folder +'/labels/train'
output_val_txt_folder =  output_folder +'/labels/val'
output_test_txt_folder =  output_folder +'/labels/test'

os.makedirs(output_train_img_folder, exist_ok=True)
os.makedirs(output_val_img_folder, exist_ok=True)
os.makedirs(output_test_img_folder, exist_ok=True)
os.makedirs(output_train_txt_folder, exist_ok=True)
os.makedirs(output_val_txt_folder, exist_ok=True)
os.makedirs(output_test_txt_folder, exist_ok=True)

listdir = [i for i in os.listdir(txtpath) if 'txt' in i]
train, val_test = train_test_split(listdir, test_size=val_test_size, shuffle=True, random_state=0)

test, val = train_test_split(val_test, test_size=test_size, shuffle=True, random_state=0)

idx = 0

for i in train:
    img_source_path = os.path.join(imgpath, '{}.{}'.format(i[:-4], postfix))
    txt_source_path = os.path.join(txtpath, i)

    img_destination_path = os.path.join(output_train_img_folder, '{}.{}'.format(i[:-4], postfix))
    txt_destination_path = os.path.join(output_train_txt_folder, i)

    shutil.copy(img_source_path, img_destination_path)
    shutil.copy(txt_source_path, txt_destination_path)
    idx += 1
    print('{}/{}'.format(idx,len(train)+len(val)+len(test)))

for i in val:
    img_source_path = os.path.join(imgpath, '{}.{}'.format(i[:-4], postfix))
    txt_source_path = os.path.join(txtpath, i)

    img_destination_path = os.path.join(output_val_img_folder, '{}.{}'.format(i[:-4], postfix))
    txt_destination_path = os.path.join(output_val_txt_folder, i)

    shutil.copy(img_source_path, img_destination_path)
    shutil.copy(txt_source_path, txt_destination_path)
    idx+=1
    print('{}/{}'.format(idx,len(train)+len(val)+len(test)))


for i in test:
    img_source_path = os.path.join(imgpath, '{}.{}'.format(i[:-4], postfix))
    txt_source_path = os.path.join(txtpath, i)

    img_destination_path = os.path.join(output_test_img_folder, '{}.{}'.format(i[:-4], postfix))
    txt_destination_path = os.path.join(output_test_txt_folder, i)

    shutil.copy(img_source_path, img_destination_path)
    shutil.copy(txt_source_path, txt_destination_path)
    idx+=1
    print('{}/{}'.format(idx,len(train)+len(val)+len(test)))
    
print('结束分割')

二、配置文件

接下来，需要为YOLO配置如下文件，以确保其能正常工作。

1.配置数据集

数据集的映射关系如下图：

为其配置一个yaml文件，包含数据集的路径，类别信息。

# 数据集的配置yaml文件

# 这里请写绝对路径，分别填入三个数据集中图像所在的位置
train: \yolov11\datasets\VOC\images\train  
val: \yolov11\datasets\VOC\images\val  
test: \yolov11\datasets\VOC\images\test  

# 类别数
nc: 6

# 类别名，请与之前转换时的序列保持一致
names: ['Boerner','Leconte','acuminatus','armandi','coleoptera','Linnaeus']

2.配置模型

新建一个yaml文件，你可以放在任何地方，这里为了方便管理放在了datasets目录下。且这个文件是官方提供的，不需要做过多更改：

# Ultralytics YOLO ????, AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

# Parameters
nc: 6 # 类别数，与之前保持一致
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
  # [depth, width, max_channels]
#重点修改此位置，将你想要的那个模型配置放在第一位
  n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
  x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
  s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
  m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
  l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs

# YOLO11n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 2, C3k2, [256, False, 0.25]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 2, C3k2, [512, False, 0.25]]
  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 2, C3k2, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 2, C3k2, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 2, C2PSA, [1024]] # 10

# YOLO11n head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 2, C3k2, [512, False]] # 13

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)

  - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)

三、开始训练

编写一个训练脚本，然后直接运行它。

from ultralytics import YOLO

if __name__ == '__main__':
    # 加载模型
    #model = YOLO('datasets/yolo11s.pt')    #预训练权重
    model = YOLO(model='datasets/yolo11.yaml')
    # 训练模型
    train_results = model.train(
        data = 'datasets/VOC/VOC.yaml',   #数据集路径
        epochs = 100,        #训练轮次
        imgsz = 640,         #图片尺寸
        device = '0',        #使用设备
        batch = 16,
        name = 'Result',    #项目保存路径
    )

训练完成后，训练结果将被放在yolov11\runs\detect目录下。

秒客网