传统的标注，需要使用标注软件人工一个一个的标注，太耗费时间和精力，如果我们有个基础的、通用的模型，但是效果一般，然后想要重新标注，我们就可以使用现有的模型，生成VOC格式的标签文件。

1、Automatic_labeling.py


"""  "*******************************************************************************************
*文件名称 ：Automatic_labeling.py
*文件功能 ：基于yolo v4的 自动标注软件

版本：1.0
内容：基于yolo v4的 自动标注软件，验证成功
时间：2021.10.27
作者：狄云
********************************************************************************************"""


import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)
# 1、导入相关包和库
import cv2 as cv
from PIL import Image
import numpy as np
import os
import sys
from yolo import YOLO #from yolo import YOLO, detect_video


yolo = YOLO()



class OBJECT:  # Bounding box + annotations
    def __init__(self, bb, classname):
        self.xmin = int(round(bb[0]))#对浮点数进行近似，无第二个参数:取整
        self.ymin = int(round(bb[1]))
        self.xmax = int(round(bb[2]))
        self.ymax = int(round(bb[3]))
        self.classname = classname

# VOC数据集格式
xml_body_1 = """<annotation>
        <folder>FOLDER</folder>
        <filename>{FILENAME}</filename>
        <path>{PATH}</path>
        <source>
                <database>Unknown</database>
        </source>
        <size>
                <width>{WIDTH}</width>
                <height>{HEIGHT}</height>
                <depth>3</depth>
        </size>
        <segmented>0</segmented>
"""
xml_object = """ <object>
                <name>{CLASS}</name>
                <pose>Unspecified</pose>
                <truncated>0</truncated>
                <difficult>0</difficult>
                <bndbox>
                        <xmin>{XMIN}</xmin>
                        <ymin>{YMIN}</ymin>
                        <xmax>{XMAX}</xmax>
                        <ymax>{YMAX}</ymax>
                </bndbox>
        </object>
"""
xml_body_2 = """</annotation>        
"""


#创建VOC文件
def create_voc_xml(xml_file, img_file,imgW, imgH, objects , display=False):
    with open(xml_file, "w") as f:
        f.write(xml_body_1.format(
            **{'FILENAME': os.path.basename(img_file), 'PATH': img_file, 'WIDTH': imgW, 'HEIGHT': imgH}))
        # for bba in listbba:
        #     f.write(xml_object.format(
        #         **{'CLASS': bba.classname, 'XMIN': bba.x1, 'YMIN': bba.y1, 'XMAX': bba.x2, 'YMAX': bba.y2}))
        for object in objects:
            f.write(xml_object.format(
                **{'CLASS': object.classname, 'XMIN': object.xmin, 'YMIN': object.ymin, 'XMAX': object.xmax, 'YMAX': object.ymax}))
        f.write(xml_body_2)
        if display: print("New xml", xml_file)



# def SAL_print_log(print_str):
#
#     print('[ERROR]路径不正确： %s' % Picture_file_path)
#     sys._getframe().f_lineno


if __name__ == "__main__":
    debug = 1

    #图片地址
    Picture_file_path = r'E:/1_Training_picture/11_car/car_train'
    Xmls_file_path    = r'E:/1_Training_picture/11_car/car_xml'
    # 确认上述地址是否存在
    if not os.path.exists(Picture_file_path):
        print('[ERROR]路径不正确： %s' % Picture_file_path)
        sys.exit(1)
    total_picture = os.listdir(Picture_file_path)  # 用于返回指定的文件夹包含的文件或文件夹的名字的列表。
    num = len(total_picture)  # xml文件个数
    print('共有 %d 张图片进行自动化标注......' % num)

    for picture in total_picture:
        # 确认上述地址是否存在
        picture_name=Picture_file_path + '/'+ picture
        print('正在读取: %s' % picture_name)
        src = cv.imread(picture_name)
        dst=src.copy()
        height, width, bytesPerComponent = src.shape  # 取彩色图片的长、宽、通道
        print( height, width, bytesPerComponent)
        cv.namedWindow("3", 0)
        cv.imshow("3", src)
        cv.waitKey(10)
        print('[DEBUG]运行到：%s 行'  % sys._getframe().f_lineno)


        # RGBtoBGR满足opencv显示格式
        frame = cv.cvtColor(src, cv.COLOR_BGR2RGB)
        # 转变成Image
        frame = Image.fromarray(np.uint8(frame))
        img0, out_boxes, out_classes = yolo.detect_image(frame)
        img0= np.array(img0)
        # RGBtoBGR满足opencv显示格式
        result = cv.cvtColor(img0, cv.COLOR_RGB2BGR)
        print('[DEBUG]运行到：%s 行'  % sys._getframe().f_lineno)

        objects = []
        for i, c in list(enumerate(out_classes)):
            print('[DEBUG]运行到：%s 行' %sys._getframe().f_lineno)

            predicted_class=out_classes[i]

            box = out_boxes[i]
            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            #436 3762 948 4032
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(height, np.floor(bottom + 0.5).astype('int32'))
            right = min(width, np.floor(right + 0.5).astype('int32'))

            xmin = left
            ymin = top
            xmax=right
            ymax=bottom
            finall_boundingBoxes=[xmin,ymin,xmax,ymax]
            objects.append(OBJECT(finall_boundingBoxes, predicted_class))
            if debug:
                print("finall_boundingBoxes_1=", finall_boundingBoxes)
                cv.rectangle(dst, (int(xmin), (int)(ymin)), ((int)(xmax),(int)(ymax)), (0, 255, 0), 5)

        if debug:
            cv.namedWindow("result", 0)
            cv.imshow('result', result)
            cv.waitKey(1)
            cv.namedWindow("dst", 0)
            cv.imshow('dst', dst)
            cv.waitKey(1)

        if len(out_classes)>0:
            print('[DEBUG]运行到：%s 行'  % sys._getframe().f_lineno)

            print("picture=", picture)
            xml=os.path.splitext(picture)
            xmls_name=Xmls_file_path+"/" +  str(xml[0]) +".xml"
            print("xmls_name=", xmls_name)

            print('[DEBUG]运行到：%s 行'  % sys._getframe().f_lineno)
            imgH=height
            imgW=width
            create_voc_xml(xmls_name, picture_name, imgW, imgH, objects, display=False)
            print('[DEBUG]运行到：%s 行'  % sys._getframe().f_lineno)
            #cv.waitKey(0)

2、yolo.py


import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)



import os
import numpy as np
import copy
import colorsys
from timeit import default_timer as timer
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from nets.yolo4 import yolo_body,yolo_eval
from utils.utils import letterbox_image
#--------------------------------------------#
#   使用自己训练好的模型预测需要修改2个参数
#   model_path和classes_path都需要修改！
#--------------------------------------------#
class YOLO(object):
    _defaults = {
        "model_path": 'model_data/yolo_car_person_20201126.h5',#加载模型
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/coco_classes.txt',
        "Chinese_classes_path": 'model_data/my_Chinese_classes.txt',
        "score" : 0.5,
        "iou" : 0.3,
        # 显存比较小可以使用416x416
        # 显存比较大可以使用608x608
        "model_image_size" : (416, 416)
    }
    target_type_list = []  ## 空列表,检测出的目标物体
    target_type_Chinese_list = []  ## 空列表,检测出的目标物体
    boxes_num=0#输出检测目标数量
    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化yolo
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.Chinse_class_names = self.get_class_Chinese_name()
        self.anchors = self._get_anchors()
        self.sess = K.get_session()
        self.boxes, self.scores, self.classes = self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # 读取对应中文标签
    def get_class_Chinese_name(self):
        classes_path = os.path.expanduser(self.Chinese_classes_path)
        with open(classes_path, 'r', encoding='UTF-8') as f:
            Chinese_class_names = f.readlines()
        Chinese_class_names = [c.strip() for c in Chinese_class_names]
        #print(Chinese_class_names)
        return Chinese_class_names
    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape(-1, 2)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
        
        # 计算anchor数量
        num_anchors = len(self.anchors)
        num_classes = len(self.class_names)

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        try:
            self.yolo_model = load_model(model_path, compile=False)
        except:
            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
            self.yolo_model.load_weights(self.model_path)
        else:
            assert self.yolo_model.layers[-1].output_shape[-1] == \
                num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

        # 打乱颜色
        np.random.seed(10101)
        np.random.shuffle(self.colors)
        np.random.seed(None)

        self.input_image_shape = K.placeholder(shape=(2, ))

        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
                num_classes, self.input_image_shape,
                score_threshold=self.score, iou_threshold=self.iou)
        return boxes, scores, classes

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        start = timer()
        self.boxes_num=0

        # 调整图片使其符合输入要求
        new_image_size = self.model_image_size
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        # 预测结果
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        #自己加的2020.3.4
        self.boxes_num=len(out_boxes)#输出目标框数量
        print(self.boxes_num)
        #自己加的2020.3.4结束
	    # 设置字体
        font = ImageFont.truetype(font='font/simhei.ttf',
                    size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        small_pic=[]
        return_boxes=[]
        return_classes=[]
        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            #predicted_Chinse_class = self.Chinse_class_names[c]
            box = out_boxes[i]
            score = out_scores[i]


            # 自己加的start
            if predicted_class != 'car':#过滤非车辆类别
               #print(predicted_class)
               continue
            return_boxes.append(box)
            return_classes.append(predicted_class)
            #self.target_type_list.append(predicted_class)  # 将标签赋值给列表
            #self.target_type_Chinese_list.append(predicted_Chinse_class)  # 将标签赋值给列表
            #print(self.target_type_list)
            #print(self.target_type_Chinese_list)
            #自己加的end
            #print("box=", box)
            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            print("top=", top,left,bottom,right,image.size[0],image.size[1])
            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)#显示英文
            #label = '{} {:.2f}'.format(predicted_Chinse_class, score)#显示中文
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        end = timer()
        print(end - start)
        return image,return_boxes,return_classes
        #return image

    def close_session(self):
        self.sess.close()

我使用的yolo v4的github公开源码：

https://github.com/bubbliiiing/yolov4-keras

该代码基于tensorflow+keras，将源码下载下来后，替我写的两个文件即可使用。

：效果

然后再手动检查一下，就可以快速得到数据集了。

醋醋百科网

Good Luck To You!

基于yolo v4的自动化标注软件实现——附源码

1、Automatic_labeling.py

2、yolo.py

：效果