Pytorch版本yolov3源码阅读

Pytorch版本yolov3源码阅读

1. 阅读test.py

1.1 参数解读

parser = argparse.ArgumentParser()

parser.add_argument('-batch_size', type=int, default=32, help='size of each image batch')

parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file')

parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='path to data config file')

parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.pt', help='path to weights file')

parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')

parser.add_argument('-iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected')

parser.add_argument('-conf_thres', type=float, default=0.5, help='object confidence threshold')

parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')

parser.add_argument('-n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')

parser.add_argument('-img_size', type=int, default=608, help='size of each image dimension')

opt = parser.parse_args()

print(opt)

batch_size: 每个batch大小，跟darknet不太一样，没有subdivision
cfg：网络配置文件
data_config_path: coco.data文件，存储相关信息
weights_path: 权重文件路径
class_path: 类别文件，注意类别的顺序，coco.names
iou_thres: iou阈值
conf_thres: 目标执行度阈值
nms_thres: 非极大抑制阈值
n_cpu: 实用多少个线程来创建batch
img_size: 设置初始图片大小

1.2 data文件解析

def parse_data_config(path):

    """Parses the data configuration file"""

    options = dict()

    options['gpus'] = '0,1'

    options['num_workers'] = '10'

    with open(path, 'r') as fp:

        lines = fp.readlines()

    for line in lines:

        line = line.strip()

        if line == '' or line.startswith('#'):

            continue

        key, value = line.split('=')

        options[key.strip()] = value.strip()

    return options

将data文件中内容存储到options这个dict中，获取的时候就可以对这个对象通过key进行提取value。

1.3 cfg文件解析

def parse_model_config(path):

    """Parses the yolo-v3 layer configuration file and returns module definitions"""

    file = open(path, 'r')

    lines = file.read().split('\n')

    lines = [x for x in lines if x and not x.startswith('#')]

    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces

    module_defs = []

    for line in lines:

        if line.startswith('['): # This marks the start of a new block

            module_defs.append({})

            module_defs[-1]['type'] = line[1:-1].rstrip()

            if module_defs[-1]['type'] == 'convolutional':

                module_defs[-1]['batch_normalize'] = 0

        else:

            key, value = line.split("=")

            value = value.strip()

            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs

返回的module_defs存储的是所有的网络参数信息，一个list中套了很多个dict.

1.4 根据cfg文件创建模块

def create_modules(module_defs):

    """

    Constructs module list of layer blocks from module configuration in module_defs

    """

    #将第一层内容，也就是网络超参数设定

    hyperparams = module_defs.pop(0)

    output_filters = [int(hyperparams['channels'])]

    module_list = nn.ModuleList()

    for i, module_def in enumerate(module_defs):

        #一个时序容器。`Modules` 会以他们传入的顺序被添加到容器中。当然，也可以传入一个`OrderedDict`

        modules = nn.Sequential()

        #根据不同的层进行不同的设计

        if module_def['type'] == 'convolutional':

            bn = int(module_def['batch_normalize'])

            filters = int(module_def['filters'])

            kernel_size = int(module_def['size'])

            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0

            #将一个 `child module` 添加到当前 `modle`。 被添加的`module`可以通过 `name`属性来获取。

            modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],

                                                        out_channels=filters,

                                                        kernel_size=kernel_size,

                                                        stride=int(module_def['stride']),

                                                        padding=pad,

                                                        bias=not bn))

            if bn:

                modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))

            if module_def['activation'] == 'leaky':

                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'upsample':

            # pytorch中的上采样函数

            upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')

            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':

            # 对yolo cfg文件中的route层进行解析

            # eg: route -1, 14

            layers = [int(x) for x in module_def['layers'].split(',')]

            # 将多个层进行以sum的形式合并

            # 这个地方发现与darknet中不同，darknet中是以concate的方式进行的

            filters = sum([output_filters[layer_i] for layer_i in layers])

            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':

            # eg from yolov3.cfg

            # from=-3

            # activation = linear

            # 未定义activation方式？？？

            filters = output_filters[int(module_def['from'])]

            modules.add_module('shortcut_%d' % i, EmptyLayer())

        elif module_def['type'] == 'yolo':

            anchor_idxs = [int(x) for x in module_def['mask'].split(',')]

            # Extract anchors

            anchors = [float(x) for x in module_def['anchors'].split(',')]

            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]

            anchors = [anchors[i] for i in anchor_idxs]

            num_classes = int(module_def['classes'])

            img_height = int(hyperparams['height'])

            # Define detection layer

            yolo_layer = YOLOLayer(anchors, num_classes, img_height, anchor_idxs)

            modules.add_module('yolo_%d' % i, yolo_layer)

        # Register module list and number of output filters

        # 将module添加到module_list中进行保存

        module_list.append(modules)

        output_filters.append(filters)

    return hyperparams, module_list

这里开始就涉及到pytorch部分的内容了：

module_list = nn.ModuleList(): 创建一个list，其中存放的是module
nn.Sequential(): 一个时序容器。Modules 会以他们传入的顺序被添加到容器中。当然，也可以传入一个OrderedDict。
add_module(name,module):将一个 child module 添加到当前 modle。被添加的module可以通过 name属性来获取。

1.5 YOLOLayer

class YOLOLayer(nn.Module):

    def __init__(self, anchors, nC, img_dim, anchor_idxs):

        super(YOLOLayer, self).__init__()

        anchors = [(a_w, a_h) for a_w, a_h in anchors]  # (pixels)

        nA = len(anchors)

        self.anchors = anchors

        self.nA = nA  # number of anchors (3)

        self.nC = nC  # number of classes (80)

        self.bbox_attrs = 5 + nC

        self.img_dim = img_dim  # from hyperparams in cfg file, NOT from parser

        if anchor_idxs[0] == (nA * 2):  # 6

            stride = 32

        elif anchor_idxs[0] == nA:  # 3

            stride = 16

        else:

            stride = 8

        # Build anchor grids

        nG = int(self.img_dim / stride)

        self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()

        self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()

        self.scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])

        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))

        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))

    def forward(self, p, targets=None, requestPrecision=False):

        FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor

        bs = p.shape[0]  # batch size

        nG = p.shape[2]  # number of grid points

        stride = self.img_dim / nG

        if p.is_cuda and not self.grid_x.is_cuda:

            self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()

            self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()

        # p.view(12, 255, 13, 13) -- > (12, 3, 13, 13, 80)  # (bs, anchors, grid, grid, classes + xywh)

        p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction

        # Get outputs

        x = torch.sigmoid(p[..., 0])  # Center x

        y = torch.sigmoid(p[..., 1])  # Center y

        # Width and height (yolo method)

        w = p[..., 2]  # Width

        h = p[..., 3]  # Height

        width = torch.exp(w.data) * self.anchor_w

        height = torch.exp(h.data) * self.anchor_h

        # Width and height (power method)

        # w = torch.sigmoid(p[..., 2])  # Width

        # h = torch.sigmoid(p[..., 3])  # Height

        # width = ((w.data * 2) ** 2) * self.anchor_w

        # height = ((h.data * 2) ** 2) * self.anchor_h

        # Add offset and scale with anchors (in grid space, i.e. 0-13)

        pred_boxes = FT(bs, self.nA, nG, nG, 4)

        pred_conf = p[..., 4]  # Conf

        pred_cls = p[..., 5:]  # Class

        # Training

        if targets is not None:

            MSELoss = nn.MSELoss(size_average=True)

            BCEWithLogitsLoss = nn.BCEWithLogitsLoss(size_average=True)

            CrossEntropyLoss = nn.CrossEntropyLoss()

            if requestPrecision:

                gx = self.grid_x[:, :, :nG, :nG]

                gy = self.grid_y[:, :, :nG, :nG]

                pred_boxes[..., 0] = x.data + gx - width / 2

                pred_boxes[..., 1] = y.data + gy - height / 2

                pred_boxes[..., 2] = x.data + gx + width / 2

                pred_boxes[..., 3] = y.data + gy + height / 2

            tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \

                build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,

                              requestPrecision)

            tcls = tcls[mask]

            if x.is_cuda:

                tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda(), mask.cuda(), tcls.cuda()

            # Mask outputs to ignore non-existing objects (but keep confidence predictions)

            nT = sum([len(x) for x in targets])  # number of targets

            nM = mask.sum().float()  # number of anchors (assigned to targets)

            nB = len(targets)  # batch size

            k = nM / nB

            if nM > 0:

                lx = k * MSELoss(x[mask], tx[mask])

                ly = k * MSELoss(y[mask], ty[mask])

                lw = k * MSELoss(w[mask], tw[mask])

                lh = k * MSELoss(h[mask], th[mask])

                # lconf = k * BCEWithLogitsLoss(pred_conf[mask], mask[mask].float())

                lconf = k * BCEWithLogitsLoss(pred_conf, mask.float())

                lcls = k * CrossEntropyLoss(pred_cls[mask], torch.argmax(tcls, 1))

                # lcls = k * BCEWithLogitsLoss(pred_cls[mask], tcls.float())

            else:

                lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])

            # Add confidence loss for background anchors (noobj)

            #lconf += k * BCEWithLogitsLoss(pred_conf[~mask], mask[~mask].float())

            # Sum loss components

            loss = lx + ly + lw + lh + lconf + lcls

            # Sum False Positives from unassigned anchors

            i = torch.sigmoid(pred_conf[~mask]) > 0.9

            if i.sum() > 0:

                FP_classes = torch.argmax(pred_cls[~mask][i], 1)

                FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu()  # extra FPs

            else:

                FPe = torch.zeros(self.nC)

            return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \

                   nT, TP, FP, FPe, FN, TC

        else:

            pred_boxes[..., 0] = x.data + self.grid_x

            pred_boxes[..., 1] = y.data + self.grid_y

            pred_boxes[..., 2] = width

            pred_boxes[..., 3] = height

            # If not in training phase return predictions

            output = torch.cat((pred_boxes.view(bs, -1, 4) * stride,

                                torch.sigmoid(pred_conf.view(bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)

            return output.data

暂且放到这里，之后在做解析

1.6 初始化模型

model = Darknet(opt.cfg, opt.img_size)

转到定义：

class Darknet(nn.Module):

    """YOLOv3 object detection model"""

    def __init__(self, config_path, img_size=416):

        super(Darknet, self).__init__()

        self.module_defs = parse_model_config(config_path)

        self.module_defs[0]['height'] = img_size

        self.hyperparams, self.module_list = create_modules(self.module_defs)

        self.img_size = img_size

        self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC']

    def forward(self, x, targets=None, requestPrecision=False):

        is_training = targets is not None

        output = []

        self.losses = defaultdict(float)

        layer_outputs = []

        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):

            if module_def['type'] in ['convolutional', 'upsample']:

                x = module(x)

            elif module_def['type'] == 'route':

                layer_i = [int(x) for x in module_def['layers'].split(',')]

                x = torch.cat([layer_outputs[i] for i in layer_i], 1)

            elif module_def['type'] == 'shortcut':

                layer_i = int(module_def['from'])

                x = layer_outputs[-1] + layer_outputs[layer_i]

            elif module_def['type'] == 'yolo':

                # Train phase: get loss

                if is_training:

                    x, *losses = module[0](x, targets, requestPrecision)

                    for name, loss in zip(self.loss_names, losses):

                        self.losses[name] += loss

                # Test phase: Get detections

                else:

                    x = module(x)

                output.append(x)

            layer_outputs.append(x)

        if is_training:

            self.losses['nT'] /= 3

            self.losses['TC'] /= 3

            metrics = torch.zeros(4, len(self.losses['FPe']))  # TP, FP, FN, target_count

            ui = np.unique(self.losses['TC'])[1:]

            for i in ui:

                j = self.losses['TC'] == float(i)

                metrics[0, i] = (self.losses['TP'][j] > 0).sum().float()  # TP

                metrics[1, i] = (self.losses['FP'][j] > 0).sum().float()  # FP

                metrics[2, i] = (self.losses['FN'][j] == 3).sum().float()  # FN

            metrics[3] = metrics.sum(0)

            metrics[1] += self.losses['FPe']

            self.losses['TP'] = metrics[0].sum()

            self.losses['FP'] = metrics[1].sum()

            self.losses['FN'] = metrics[2].sum()

            self.losses['TC'] = 0

            self.losses['metrics'] = metrics

        return sum(output) if is_training else torch.cat(output, 1)

梳理一下属性值，以便更好理解：

module_def: dict类型，存储cfg文件中
hyperparams: 超参数，整个网络需要的参数被存储到改属性中
module_list：整个网络所有的模型加载到pytorch中的nn.ModuleList()
loss_names: 有必要理解一下这里的loss中参数的含义
- loss
- x,y,w,h
- conf
- cls
- nT
- TP,FP,FPe,FN,TC

loss参数含义还不是很明白，留坑，待填坑

1.7 加载权重

都知道，pytorch版的yolov3权重文件是.pt结尾的，darknet版本的yolov3权重文件是.weights结尾的。

所以得知了这个版本可以使用加载weights文件。

# Load weights

if opt.weights_path.endswith('.weights'):  # darknet format

    load_weights(model, opt.weights_path)

elif opt.weights_path.endswith('.pt'):  # pytorch format

    checkpoint = torch.load(opt.weights_path, map_location='cpu')

    model.load_state_dict(checkpoint['model'])

    del checkpoint

1.8 计算mAP

print('Compute mAP...')

correct = 0

targets = None

outputs, mAPs, TP, confidence, pred_class, target_class = [], [], [], [], [], []

for batch_i, (imgs, targets) in enumerate(dataloader):

    imgs = imgs.to(device)

    with torch.no_grad():

        output = model(imgs)

        output = non_max_suppression(output, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres)

    # Compute average precision for each sample

    for sample_i in range(len(targets)):

        correct = []

        # Get labels for sample where width is not zero (dummies)

        annotations = targets[sample_i]

        # Extract detections

        detections = output[sample_i]

        if detections is None:

            # If there are no detections but there are annotations mask as zero AP

            if annotations.size(0) != 0:

                mAPs.append(0)

            continue

        # Get detections sorted by decreasing confidence scores

        detections = detections[np.argsort(-detections[:, 4])]

        # If no annotations add number of detections as incorrect

        if annotations.size(0) == 0:

            target_cls = []

            #correct.extend([0 for _ in range(len(detections))])

            mAPs.append(0)

            continue

        else:

            target_cls = annotations[:, 0]

            # Extract target boxes as (x1, y1, x2, y2)

            target_boxes = xywh2xyxy(annotations[:, 1:5])

            target_boxes *= opt.img_size

            detected = []

            for *pred_bbox, conf, obj_conf, obj_pred in detections:

                pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)

                # Compute iou with target boxes

                iou = bbox_iou(pred_bbox, target_boxes)

                # Extract index of largest overlap

                best_i = np.argmax(iou)

                # If overlap exceeds threshold and classification is correct mark as correct

                if iou[best_i] > opt.iou_thres and obj_pred == annotations[best_i, 0] and best_i not in detected:

                    correct.append(1)

                    detected.append(best_i)

                else:

                    correct.append(0)

        # Compute Average Precision (AP) per class

        AP = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=detections[:, 6], target_cls=target_cls)

        # Compute mean AP for this image

        mAP = AP.mean()

        # Append image mAP to list

        mAPs.append(mAP)

        # Print image mAP and running mean mAP

        print('+ Sample [%d/%d] AP: %.4f (%.4f)' % (len(mAPs), len(dataloader) * opt.batch_size, mAP, np.mean(mAPs)))

print('Mean Average Precision: %.4f' % np.mean(mAPs))

留坑，待填

2. 阅读train.py

2.1 参数解读

parser = argparse.ArgumentParser()

parser.add_argument('-epochs', type=int, default=68, help='number of epochs')

parser.add_argument('-batch_size', type=int, default=12, help='size of each image batch')

parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='data config file path')

parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')

parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')

parser.add_argument('-resume', default=False, help='resume training flag')

opt = parser.parse_args()

print(opt)

epochs 设置循环的参数
batch_size: 设置batch
data_config_path: data文件位置
cfg: 记录cfg文件的位置
img_size: 设置图片大小
resume: 是否恢复训练（True or False）

2.2 随机初始化

random.seed(0)

np.random.seed(0)

torch.manual_seed(0)

if cuda:

    torch.cuda.manual_seed(0)

    torch.cuda.manual_seed_all(0)

    torch.backends.cudnn.benchmark = True

2.3 设置优化器

optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3,momentum=.9, weight_decay=5e-4, nesterov=True)

使用SGD优化器，learning_rate=0.001,momentum=0.9,weight_decay=5e-4,使用nesterov动量

2.4 更新优化器

根据当前epoch来确定使用哪一个lr:

        # Update scheduler (automatic)

        # scheduler.step()

        # Update scheduler (manual)

        if epoch < 54:

            lr = 1e-3

        elif epoch < 61:

            lr = 1e-4

        else:

            lr = 1e-5

        for g in optimizer.param_groups:

            g['lr'] = lr

可以自动更新参数，也可以手工更新参数。

2.5 loss指标

mean_precision:

            # Precision

            precision = metrics[0] / (metrics[0] + metrics[1] + 1e-16)

            k = (metrics[0] + metrics[1]) > 0

            if k.sum() > 0:

                mean_precision = precision[k].mean()

            else:

                mean_precision = 0

mean_recall:

            # Recall

            recall = metrics[0] / (metrics[0] + metrics[2] + 1e-16)

            k = (metrics[0] + metrics[2]) > 0

            if k.sum() > 0:

                mean_recall = recall[k].mean()

            else:

                mean_recall = 0

然后将所有指标写到results.txt文件中

2.6 checkpoint相关

checkpoint参数：epoch, best_loss,model,optimizer

latest.pt: 最新的权重文件

best.pt: 当前最好的权重文件

        # Save latest checkpoint

        checkpoint = {'epoch': epoch,

                      'best_loss': best_loss,

                      'model': model.state_dict(),

                      'optimizer': optimizer.state_dict()}

        torch.save(checkpoint, 'checkpoints/latest.pt')

        # Save best checkpoint

        if best_loss == loss_per_target:

            os.system('cp checkpoints/latest.pt checkpoints/best.pt')

        # Save backup checkpoint

        if (epoch > 0) & (epoch % 5 == 0):

            os.system('cp checkpoints/latest.pt checkpoints/backup' + str(epoch) + '.pt')

3. 阅读detect.py

3.1 参数解读

parser.add_argument('-image_folder', type=str, default='data/samples', help='path to images')

parser.add_argument('-output_folder', type=str, default='output', help='path to outputs')

parser.add_argument('-plot_flag', type=bool, default=True)

parser.add_argument('-txt_out', type=bool, default=False)

parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')

parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')

parser.add_argument('-conf_thres', type=float, default=0.50, help='object confidence threshold')

parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')

parser.add_argument('-batch_size', type=int, default=1, help='size of the batches')

parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')

opt = parser.parse_args()

print(opt)

image_folder: data/samples, 待检测的图片的文件夹
output_folder: output,结果输出文件
plot_flag: True or False, 添加bbox, 保存图片
txt_out: True or False, 是否保存图片检测结果
cfg: cfg文件路径
class_path: 类别名称文件位置
conf_thres, nms_thres: 目标检测置信度，非极大抑制阈值
batch_size: 一般设置为1，选用默认的即可
img_size: 设置加载图片时候的图片大小

3.2 预测框的获取

        # Get detections

        with torch.no_grad():

            chip = torch.from_numpy(img).unsqueeze(0).to(device)

            pred = model(chip)

            pred = pred[pred[:, :, 4] > opt.conf_thres]

            if len(pred) > 0:

                detections = non_max_suppression(pred.unsqueeze(0), opt.conf_thres, opt.nms_thres)

                img_detections.extend(detections)

                imgs.extend(img_paths)

获取预测框，非极大值抑制。

3.2 核心-迭代图片画出预测框

# Iterate through images and save plot of detections

    for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):

        print("image %g: '%s'" % (img_i, path))

        if opt.plot_flag:

            img = cv2.imread(path)

        # The amount of padding that was added

        pad_x = max(img.shape[0] - img.shape[1], 0) * (opt.img_size / max(img.shape))

        pad_y = max(img.shape[1] - img.shape[0], 0) * (opt.img_size / max(img.shape))

        # Image height and width after padding is removed

        unpad_h = opt.img_size - pad_y

        unpad_w = opt.img_size - pad_x

        # Draw bounding boxes and labels of detections

        if detections is not None:

            unique_classes = detections[:, -1].cpu().unique()

            bbox_colors = random.sample(color_list, len(unique_classes))

            # write results to .txt file

            results_img_path = os.path.join(opt.output_folder, path.split('/')[-1])

            results_txt_path = results_img_path + '.txt'

            if os.path.isfile(results_txt_path):

                os.remove(results_txt_path)

            for i in unique_classes:

                n = (detections[:, -1].cpu() == i).sum()

                print('%g %ss' % (n, classes[int(i)]))

            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:

                # Rescale coordinates to original dimensions

                box_h = ((y2 - y1) / unpad_h) * img.shape[0]

                box_w = ((x2 - x1) / unpad_w) * img.shape[1]

                y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()

                x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()

                x2 = (x1 + box_w).round().item()

                y2 = (y1 + box_h).round().item()

                x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)

                # write to file

                if opt.txt_out:

                    with open(results_txt_path, 'a') as file:

                        file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))

                if opt.plot_flag:

                    # Add the bbox to the plot

                    label = '%s %.2f' % (classes[int(cls_pred)], conf)

                    color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]

                    plot_one_box([x1, y1, x2, y2], img, label=label, color=color)

        if opt.plot_flag:

            # Save generated image with detections

            cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)

秒客网

Pytorch版本yolov3源码阅读

Pytorch版本yolov3源码阅读

1. 阅读test.py

1.1 参数解读

1.2 data文件解析

1.3 cfg文件解析

1.4 根据cfg文件创建模块

1.5 YOLOLayer

1.6 初始化模型

1.7 加载权重

1.8 计算mAP

2. 阅读train.py

2.1 参数解读

2.2 随机初始化

2.3 设置优化器

2.4 更新优化器

2.5 loss指标

2.6 checkpoint相关

3. 阅读detect.py

3.1 参数解读

3.2 预测框的获取

3.2 核心-迭代图片画出预测框

相关文章