python 将yaml标签文件转化为xml格式的标签文件

最近在跑SSD和Faster R-CNN深度学习代码，下载了一些数据集，但是这些数据集标签文件不是xml格式文件，而是yaml文件，虽然网上有在线转化的工具，但是这种做法对我来说显然是很低效率的。为了提高效率，自己写了相关的代码。现在分享给大家。

感谢Bosch Small Traffic Lights Dataset 提供的部分代码。

以下是label_images.py程序：

#!/usr/bin/env python
 """
Example usage:
    python label_images.py input.yaml [output_folder]
"""
import sys
import os
import cv2
from read_label_file import get_all_labels
from WriteFileXml import writeInfoToXml

def ir(some_value):
    """Int-round function for short array indexing """
    return int(round(some_value))

def show_label_images(input_yaml, output_folder=None):
    images = get_all_labels(input_yaml)

    if output_folder is not None:
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

    for i, image_dict in enumerate(images):
        #print image_dict['boxes']
        image = cv2.imread(image_dict['path'])
        img_size=image.shape

        if image is None:
            raise IOError('Could not open image path', image_dict['path'])

        dirname,image_name=os.path.split(image_dict['path'])    #get name of image
        XmlName = image_name.split('.',1)[0] + '.xml' #get name of label
        writeInfoToXml(XmlName,image_name,image_dict,img_size,output_folder)  #covert yaml to xml


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(-1)
    label_file = sys.argv[1]
    output_folder = None if len(sys.argv) < 3 else sys.argv[2]#output_folder is path to store xml
    show_label_images(label_file, output_folder)

而下面是WriteFileXml.py程序，该程序是生成xml格式的标签文件，仿照pascal voc中的label的格式。

# -*- coding: utf-8 -*-
# @Time    : 18-5-23 上午6:58
# @Author  : lei liu
# @Blog    ：https://blog.csdn.net/T1243_3
# coding=utf-8

from xml.dom.minidom import Document
import os

    # 将self.orderDict中的信息写入本地xml文件，参数filename是xml文件名
def writeInfoToXml(XmlName,imgname,image_dict,img_size,output_folder):
    # 创建dom文档
    doc = Document()

    orderlist = doc.createElement('annotation') # 创建根节点
    doc.appendChild(orderlist)      # 根节点插入dom树

    folder = doc.createElement('folder')
    folder_text = doc.createTextNode('VOC2007')
    folder.appendChild(folder_text)
    orderlist.appendChild(folder)


    filename = doc.createElement('filename')   #imgname
    filename_text = doc.createTextNode(imgname)
    filename.appendChild(filename_text)
    orderlist.appendChild(filename)

    """
    在根节点annotation下创建子节点size，在size下创建width,height和depth节点
    """
    size = doc.createElement('size')  # imgsize 根size,子：width,height,depth
    orderlist.appendChild(size)

    width = doc.createElement('width') #imgsize->width
    width_text = doc.createTextNode(str(img_size[1]))
    width.appendChild(width_text)
    size.appendChild(width)

    height = doc.createElement('height')#imgsize->height
    height_text = doc.createTextNode(str(img_size[0]))
    height.appendChild(height_text)
    size.appendChild(height)

    depth = doc.createElement('depth')  # imgsize->depth
    depth_text = doc.createTextNode(str(img_size[2]))
    depth.appendChild(depth_text)
    size.appendChild(depth)


    for i in range(len(image_dict['boxes'])):
        object = doc.createElement('object')
        orderlist.appendChild(object)

        name = doc.createElement('name')  # object->name
        name_text = doc.createTextNode(str(image_dict['boxes'][i]['label']))
        name.appendChild(name_text)
        object.appendChild(name)

        bndbox = doc.createElement('bndbox')  # object->bndbox
        object.appendChild(bndbox)


        xmin = doc.createElement('xmin')  # labelsize->width
        xmin_text = doc.createTextNode(str(image_dict['boxes'][i]['x_min']))
        xmin.appendChild(xmin_text)
        bndbox.appendChild(xmin)

        ymin = doc.createElement('ymin')  # labelsize->height
        ymin_text = doc.createTextNode(str(image_dict['boxes'][i]['y_min']))
        ymin.appendChild(ymin_text)
        bndbox.appendChild(ymin)

        xmax = doc.createElement('xmax')  # labelsize->width
        xmax_text = doc.createTextNode(str(image_dict['boxes'][i]['x_max']))
        xmax.appendChild(xmax_text)
        bndbox.appendChild(xmax)

        ymax = doc.createElement('ymax')  # labelsize->width
        ymax_text = doc.createTextNode(str(image_dict['boxes'][i]['y_max']))
        ymax.appendChild(ymax_text)
        bndbox.appendChild(ymax)

    # 将dom对象写入本地xml文件
    with open(os.path.join(output_folder,XmlName), 'w') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))

if __name__ == '__main__':
    image_dict=[1]
    writeInfoToXml('/home/ubuntu/bstld-master/test1.xml','0001.png',image_dict)

最后生成的标签文件如下面xml文件所示：

<?xml version="1.0" encoding="utf-8"?>
<annotation>
	<folder>VOC2007</folder>
	<filename>26420.png</filename>
	<size>
		<width>1280</width>
		<height>720</height>
		<depth>3</depth>
	</size>
	<object>
		<name>Green</name>
		<bndbox>
			<xmin>940.25</xmin>
			<ymin>242.625</ymin>
			<xmax>951.0</xmax>
			<ymax>277.25</ymax>
		</bndbox>
	</object>
	<object>
		<name>Green</name>
		<bndbox>
			<xmin>95.0</xmin>
			<ymin>250.0</ymin>
			<xmax>104.5</xmax>
			<ymax>285.625</ymax>
		</bndbox>
	</object>
</annotation>

秒客网

python 将yaml标签文件转化为xml格式的标签文件

相关文章