传统的标注,需要使用标注软件人工一个一个的标注,太耗费时间和精力,如果我们有个基础的、通用的模型,但是效果一般,然后想要重新标注,我们就可以使用现有的模型,生成VOC格式的标签文件。
1、Automatic_labeling.py
""" "*******************************************************************************************
*文件名称 :Automatic_labeling.py
*文件功能 :基于yolo v4的 自动标注软件
版本:1.0
内容:基于yolo v4的 自动标注软件,验证成功
时间:2021.10.27
作者:狄云
********************************************************************************************"""
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)
# 1、导入相关包和库
import cv2 as cv
from PIL import Image
import numpy as np
import os
import sys
from yolo import YOLO #from yolo import YOLO, detect_video
yolo = YOLO()
class OBJECT: # Bounding box + annotations
def __init__(self, bb, classname):
self.xmin = int(round(bb[0]))#对浮点数进行近似,无第二个参数:取整
self.ymin = int(round(bb[1]))
self.xmax = int(round(bb[2]))
self.ymax = int(round(bb[3]))
self.classname = classname
# VOC数据集格式
xml_body_1 = """<annotation>
<folder>FOLDER</folder>
<filename>{FILENAME}</filename>
<path>{PATH}</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>{WIDTH}</width>
<height>{HEIGHT}</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
"""
xml_object = """ <object>
<name>{CLASS}</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>{XMIN}</xmin>
<ymin>{YMIN}</ymin>
<xmax>{XMAX}</xmax>
<ymax>{YMAX}</ymax>
</bndbox>
</object>
"""
xml_body_2 = """</annotation>
"""
#创建VOC文件
def create_voc_xml(xml_file, img_file,imgW, imgH, objects , display=False):
with open(xml_file, "w") as f:
f.write(xml_body_1.format(
**{'FILENAME': os.path.basename(img_file), 'PATH': img_file, 'WIDTH': imgW, 'HEIGHT': imgH}))
# for bba in listbba:
# f.write(xml_object.format(
# **{'CLASS': bba.classname, 'XMIN': bba.x1, 'YMIN': bba.y1, 'XMAX': bba.x2, 'YMAX': bba.y2}))
for object in objects:
f.write(xml_object.format(
**{'CLASS': object.classname, 'XMIN': object.xmin, 'YMIN': object.ymin, 'XMAX': object.xmax, 'YMAX': object.ymax}))
f.write(xml_body_2)
if display: print("New xml", xml_file)
# def SAL_print_log(print_str):
#
# print('[ERROR]路径不正确: %s' % Picture_file_path)
# sys._getframe().f_lineno
if __name__ == "__main__":
debug = 1
#图片地址
Picture_file_path = r'E:/1_Training_picture/11_car/car_train'
Xmls_file_path = r'E:/1_Training_picture/11_car/car_xml'
# 确认上述地址是否存在
if not os.path.exists(Picture_file_path):
print('[ERROR]路径不正确: %s' % Picture_file_path)
sys.exit(1)
total_picture = os.listdir(Picture_file_path) # 用于返回指定的文件夹包含的文件或文件夹的名字的列表。
num = len(total_picture) # xml文件个数
print('共有 %d 张图片进行自动化标注......' % num)
for picture in total_picture:
# 确认上述地址是否存在
picture_name=Picture_file_path + '/'+ picture
print('正在读取: %s' % picture_name)
src = cv.imread(picture_name)
dst=src.copy()
height, width, bytesPerComponent = src.shape # 取彩色图片的长、宽、通道
print( height, width, bytesPerComponent)
cv.namedWindow("3", 0)
cv.imshow("3", src)
cv.waitKey(10)
print('[DEBUG]运行到:%s 行' % sys._getframe().f_lineno)
# RGBtoBGR满足opencv显示格式
frame = cv.cvtColor(src, cv.COLOR_BGR2RGB)
# 转变成Image
frame = Image.fromarray(np.uint8(frame))
img0, out_boxes, out_classes = yolo.detect_image(frame)
img0= np.array(img0)
# RGBtoBGR满足opencv显示格式
result = cv.cvtColor(img0, cv.COLOR_RGB2BGR)
print('[DEBUG]运行到:%s 行' % sys._getframe().f_lineno)
objects = []
for i, c in list(enumerate(out_classes)):
print('[DEBUG]运行到:%s 行' %sys._getframe().f_lineno)
predicted_class=out_classes[i]
box = out_boxes[i]
top, left, bottom, right = box
top = top - 5
left = left - 5
bottom = bottom + 5
right = right + 5
#436 3762 948 4032
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(height, np.floor(bottom + 0.5).astype('int32'))
right = min(width, np.floor(right + 0.5).astype('int32'))
xmin = left
ymin = top
xmax=right
ymax=bottom
finall_boundingBoxes=[xmin,ymin,xmax,ymax]
objects.append(OBJECT(finall_boundingBoxes, predicted_class))
if debug:
print("finall_boundingBoxes_1=", finall_boundingBoxes)
cv.rectangle(dst, (int(xmin), (int)(ymin)), ((int)(xmax),(int)(ymax)), (0, 255, 0), 5)
if debug:
cv.namedWindow("result", 0)
cv.imshow('result', result)
cv.waitKey(1)
cv.namedWindow("dst", 0)
cv.imshow('dst', dst)
cv.waitKey(1)
if len(out_classes)>0:
print('[DEBUG]运行到:%s 行' % sys._getframe().f_lineno)
print("picture=", picture)
xml=os.path.splitext(picture)
xmls_name=Xmls_file_path+"/" + str(xml[0]) +".xml"
print("xmls_name=", xmls_name)
print('[DEBUG]运行到:%s 行' % sys._getframe().f_lineno)
imgH=height
imgW=width
create_voc_xml(xmls_name, picture_name, imgW, imgH, objects, display=False)
print('[DEBUG]运行到:%s 行' % sys._getframe().f_lineno)
#cv.waitKey(0)
2、yolo.py
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)
import os
import numpy as np
import copy
import colorsys
from timeit import default_timer as timer
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from nets.yolo4 import yolo_body,yolo_eval
from utils.utils import letterbox_image
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
#--------------------------------------------#
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo_car_person_20201126.h5',#加载模型
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"Chinese_classes_path": 'model_data/my_Chinese_classes.txt',
"score" : 0.5,
"iou" : 0.3,
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
"model_image_size" : (416, 416)
}
target_type_list = [] ## 空列表,检测出的目标物体
target_type_Chinese_list = [] ## 空列表,检测出的目标物体
boxes_num=0#输出检测目标数量
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
#---------------------------------------------------#
# 初始化yolo
#---------------------------------------------------#
def __init__(self, **kwargs):
self.__dict__.update(self._defaults)
self.class_names = self._get_class()
self.Chinse_class_names = self.get_class_Chinese_name()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
# 读取对应中文标签
def get_class_Chinese_name(self):
classes_path = os.path.expanduser(self.Chinese_classes_path)
with open(classes_path, 'r', encoding='UTF-8') as f:
Chinese_class_names = f.readlines()
Chinese_class_names = [c.strip() for c in Chinese_class_names]
#print(Chinese_class_names)
return Chinese_class_names
#---------------------------------------------------#
# 获得所有的先验框
#---------------------------------------------------#
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# 计算anchor数量
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path)
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# 画框设置不同的颜色
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
# 打乱颜色
np.random.seed(10101)
np.random.shuffle(self.colors)
np.random.seed(None)
self.input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image):
start = timer()
self.boxes_num=0
# 调整图片使其符合输入要求
new_image_size = self.model_image_size
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
# 预测结果
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
#自己加的2020.3.4
self.boxes_num=len(out_boxes)#输出目标框数量
print(self.boxes_num)
#自己加的2020.3.4结束
# 设置字体
font = ImageFont.truetype(font='font/simhei.ttf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
small_pic=[]
return_boxes=[]
return_classes=[]
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[c]
#predicted_Chinse_class = self.Chinse_class_names[c]
box = out_boxes[i]
score = out_scores[i]
# 自己加的start
if predicted_class != 'car':#过滤非车辆类别
#print(predicted_class)
continue
return_boxes.append(box)
return_classes.append(predicted_class)
#self.target_type_list.append(predicted_class) # 将标签赋值给列表
#self.target_type_Chinese_list.append(predicted_Chinse_class) # 将标签赋值给列表
#print(self.target_type_list)
#print(self.target_type_Chinese_list)
#自己加的end
#print("box=", box)
top, left, bottom, right = box
top = top - 5
left = left - 5
bottom = bottom + 5
right = right + 5
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print("top=", top,left,bottom,right,image.size[0],image.size[1])
# 画框框
label = '{} {:.2f}'.format(predicted_class, score)#显示英文
#label = '{} {:.2f}'.format(predicted_Chinse_class, score)#显示中文
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image,return_boxes,return_classes
#return image
def close_session(self):
self.sess.close()
我使用的yolo v4的github公开源码:
https://github.com/bubbliiiing/yolov4-keras
该代码基于tensorflow+keras,将源码下载下来后,替我写的两个文件即可使用。
:效果
然后再手动检查一下,就可以快速得到数据集了。