基于深度学习的广告牌店牌目标检测
anaconda安装
创建虚拟环境
conda create -n tf2 python=3.7
激活虚拟环境
conda activate tf2
TensorFlow安装
conda install tensorflow-gpu==2.2 #(目前windows下anaconda还没有2.2,只有2.1,但linux有2.2)
pip install tensorflow-gpu==2.2.0 -i https://pypi.douban.com/simple/
输入pyhon进入环境,再输入命令:import tensorflow as tf,没报错就再输入tf._version_
tensorflow object detection api安装
git clone https://github.com/tensorflow/models.git
先安装一波protobuf
conda install protobuf
Python,要先装vs中的C++,还有pycocotools
pip install pycocotools==2.0.2
cd models/research
# Compile protos.
protoc object_detection/protos/*.proto --python_out=.
# Install TensorFlow Object Detection API.
cp object_detection/packages/tf2/setup.py .
python -m pip install --use-feature=2020-resolver .
测试一波
python object_detection/builders/model_builder_tf2_test.py
创建tfrecord格式数据集
具体操作步骤在models\research\object_detection\g3doc\tf2.md文档中,一步一步按操作来。
tensorflow目标检测api的要求的数据集格式要转换成tfrecord格式,因此先把广告牌的数据及格式变为VOC格式,再转化成tfrecord。
在门牌检测目录下创建ImageSets/Main,然后运行下面这段代码
import os
import random
trainval_percent = 0.9 # 可以自己设置
train_percent = 0.7 # 可以自己设置
xmlfilepath = f"E:/学习/实验室/目标检测/门牌检测/Annotations" # 地址填自己的
txtsavepath = f"E:/学习/实验室/目标检测/门牌检测/ImageSets/Main"
total_xml = os.listdir(xmlfilepath)
total_xml.sort()
total_xml.sort(key = lambda x: int(x[:-4])) ##文件名按数字排序
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
print('Well finshed')
在models\research\object_detection文件夹下面创建billboard文件
为了方便操作,把门牌检测文件夹改为voc,并拷到文件夹billboard下
把models\research\object_detection\data\pascal_label_map.pbtxt 文件拷过来,然后修改
item {
id: 1
name: 'attached_sd'
}
item {
id: 2
name: 'attached_ad'
}
item {
id: 3
name: 'floor_ad'
}
把models\research\object_detection\dataset_tools\create_pascal_tf_record.py拷贝过来,然后稍微修改一下
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
python object_detection/dataset_tools/create_pascal_tf_record.py \
--data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import logging
import os
from lxml import etree
import PIL.Image
import tensorflow.compat.v1 as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
FLAGS = flags.FLAGS
SETS = ['train', 'val', 'trainval', 'test']
YEARS = ['voc']
def dict_to_tf_example(data,
dataset_directory,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join('voc', image_subdirectory, data['filename'])
# img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
print("img_path: " + img_path)
full_path = os.path.join(dataset_directory, img_path)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
if 'object' in data:
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'].encode('utf8'))
classes.append(label_map_dict[obj['name']])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def main(_):
if FLAGS.set not in SETS:
raise ValueError('set must be in : {}'.format(SETS))
if FLAGS.year not in YEARS:
raise ValueError('year must be in : {}'.format(YEARS))
data_dir = FLAGS.data_dir
years = ['voc']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main', "" + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
然后到models\research下,分别执行下面的python语句
python object_detection\billboard\create_pascal_tf_record.py --label_map_path=object_detection\billboard\pascal_label_map.pbtxt --data_dir=D:\models\research\object_detection\billboard --year=voc --set=train --output_path=object_detection\billboard\billboard_train.record
python object_detection\billboard\create_pascal_tf_record.py --label_map_path=object_detection\billboard\pascal_label_map.pbtxt --data_dir=D:\models\research\object_detection\billboard --year=voc --set=val --output_path=object_detection\billboard\billboard_val.record
特例:526.xml中的属性改成attached_sd,应该算悬挂的店排,其他可能还有我忽略的特例,自行更改。
注意:fine_tune_checkpoint_type: 类型有”fine_tune”和”detection”,哪个跑得动选哪个
训练模型
python object_detection/model_main_tf2.py --pipeline_config_path=object_detection/billboard/centernet_hg104/pipeline.config --model_dir=object_detection/billboard/centernet_hg104_result --alsologtostderr
watch -n 1 -d nvidia-smi
评估模型
(应该是在训练模型的时候,另开一个窗口跑这个,但是显存不够的话还是建议别开)
python object_detection/model_main_tf2.py --pipeline_config_path=object_detection/billboard/centernet_hg104/pipeline.config --model_dir=object_detection/billboard/centernet_hg104_result --checkpoint_dir=object_detection/billboard/centernet_hg104_result --alsologtostderr
查看训练过程
开启后打开相应的网址进行查看
tensorboard --logdir=object_detection/billboard/centernet_hg104_result
导出模型到output文件夹中
python object_detection\exporter_main_v2.py input_type image_tensor --pipeline_config_path object_detection\billboard\ssd_mobilenet_v2_fpnlite\pipeline.config --trained_checkpoint_dir object_detection\billboard\ssd_mobilenet_v2_fpnlite_result --output_directory object_detection\billboard\output
使用模型进行预测,预测的图片都放入test文件夹中
python object_detection\billboard\predicate.py
基于深度学习的公路隧道影像中的裂缝提取
框架用mask r-cnn
模型下载地址
裂缝的数据集给的不是全部内容,得自己再进行标注(累)
使用labelMe进行标注,先去anaconda里面安装
#(后面这行是国内的清华镜像源,下载速度才会比较快)
pip install PyQt5 -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install pyqt5-tools -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install lxml -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install labelMe -i https://pypi.tuna.tsinghua.edu.cn/simple/
然后输入labelMe打开,然后选取edit->create_linestrip,对着mask图片画线,记得保存(也有自动保存选项)
然后将json文件分成两部分,一部分给train,一部分给val,建议7:3,当然也可以留几张做test
文件结构目录设置
├─datasets│ ├─images│ ├─mask│ ├─train_annotations│ └─val_annotations
下载文件
将里面的create_tf_record.py、read_pbtxt_file.py和string_int_label_map_pb2.py放到crack文件目录下,将里面的 importimport tensorflow as tf 改为 import tensorflow.compat.v1 as tf ,并且 polygon = np.array(polygon) 改为 polygon = np.array(polygon,dtype=np.int) 。
crack_label_map.pbtxt修改一下内容,改成crack后放到datasets目录下
创建crack_train.record
python create_tf_record.py ^
--images_dir=datasets\images ^
--annotations_json_dir=datasets\train_annotations ^
--label_map_path=datasets\crack_label_map.pbtxt ^
--output_path=datasets\crack_train.record
创建crack_val.record
python create_tf_record.py ^
--images_dir=datasets\images ^
--annotations_json_dir=datasets\val_annotations ^
--label_map_path=datasets\crack_label_map.pbtxt ^
--output_path=datasets\crack_val.record
训练模型
python object_detection/model_main_tf2.py --pipeline_config_path=object_detection/crack/mask_rcnn/pipeline.config --model_dir=object_detection/crack/mask_rcnn_result4 --alsologtostderr
watch -n 1 -d nvidia-smi
评估模型
python object_detection/model_main_tf2.py --pipeline_config_path=object_detection/crack/mask_rcnn/pipeline.config --model_dir=object_detection/crack/mask_rcnn_result4 --checkpoint_dir=object_detection/crack/mask_rcnn_result4 --alsologtostderr
查看
tensorboard --logdir=object_detection/crack/mask_rcnn_result4
YOLOV4试验
编译用下面的教程,原版的不太行。
CUDA安装的时候一定要自定义全部装,不要精简!!!!!!!!!!!
https://blog.csdn.net/weixin_43723614/article/details/105772081
将voc数据格式转换成yolo格式,voc_label.py
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=['train','val','test']
classes = ["ship"]
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
in_file = open('Annotations/%s.xml'%(image_id))
out_file = open('labels/%s.txt'%(image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
diff = obj.find('Difficult')
if diff is None:
diff = obj.find('difficult')
difficult = diff.text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
if not os.path.exists('labels/'):
os.makedirs('labels/')
image_ids = open('ImageSets/Main/%s.txt'%( image_set)).read().strip().split()
list_file = open('%s.txt'%(image_set), 'w')
for image_id in image_ids:
print(image_id)
list_file.write('%s\JPEGImages\%s.jpg\n'%(wd, image_id))
convert_annotation(image_id)
list_file.close()
训练模型
darknet.exe detector train data/ship.data cfg/yolov4-ship2.cfg yolov4.conv.137 -map
评估模型
darknet.exe detector valid data/ship.data cfg/yolov4-ship.cfg backup/yolov4-ship_best.weights
darknet.exe detector map data/ship.data cfg/yolov4-ship.cfg backup/yolov4-ship_best.weights
测试模型
darknet.exe detector test data/ship.data cfg/yolov4-ship.cfg backup/yolov4-ship_best.weights D:\szx\yolo\darknet\data\ship\voc\JPEGImages\Sen_ship_hv_0201610270202002.jpg
批量测试图片
获得test合集
darknet.exe detector test data/ship.data cfg/yolov4-ship.cfg backup/yolov4-ship_best.weights -ext_output -dont_show -out val_result.json < data/val.txt >> val_result.txt
对result合集进行画图
import json
import cv2
def load_json(path):
with open(path, 'r') as f:
json_dict = json.load(f)
return json_dict
results_json = 'val_result.json'
path_save = "D:/szx/yolo/darknet/val_img/"
json_dict = load_json(results_json)
for i in json_dict:
print(i)
image_path = i['filename']
file_name = image_path.split("/")[-1]
print(file_name)
img = cv2.imread(image_path)
[h, w, c] = img.shape
objects_info = i['objects']
for j in objects_info:
class_id = j['class_id']
# if class_id == 0:
# color = (255, 0, 0)
# else:
color = (0, 0, 255)
box_info = j['relative_coordinates']
center_x = box_info['center_x']
center_y = box_info['center_y']
width = box_info['width']
height = box_info['height']
confidence = j['confidence']
x_min = int((center_x - width / 2) * w)
y_min = int((center_y - height / 2) * h)
x_max = int((center_x + width / 2) * w)
y_max = int((center_y + height / 2) * h)
img = cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color,2)
#画文字框
label_text = 'ship(' + str(round(confidence * 100, 1)) + '%)'
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)[0]
label_xmin = x_min
label_ymin = y_min - label_size[1]
if (label_ymin < 1):
label_ymin = 1
label_xmax = label_xmin + label_size[0]
label_ymax = label_ymin + label_size[1]
cv2.rectangle(img, (label_xmin - 1, label_ymin - 5), (label_xmax +1, label_ymax -3), label_background_color,-1)
cv2.putText(img, label_text, (x_min, y_min-4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, label_text_color, 1)
# cv2.imshow('img', img)
image_save = path_save + file_name
print(image_save)
cv2.imwrite( image_save ,img)
# cv2.waitKey(500)
真值框画图
import os
import os.path
import numpy as np
import xml.etree.ElementTree as xmlET
from PIL import Image, ImageDraw
import cv2
classes = ('ship')
file_path_img = 'JPEGImages'
file_path_xml = 'Annotations'
save_file_path = 'ground_truth'
pathDir = open('ImageSets/Main/val.txt').read().strip().split()
print(pathDir)
color = (0, 0, 255)
for idx in range(len(pathDir)):
filename = pathDir[idx]+'.xml'
tree = xmlET.parse(os.path.join(file_path_xml, filename))
objs = tree.findall('object')
num_objs = len(objs)
boxes = np.zeros((num_objs, 5), dtype=np.uint16)
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cla = obj.find('name').text
label = classes.index(cla)
boxes[ix, 0:4] = [x1, y1, x2, y2]
boxes[ix, 4] = label
image_name = os.path.splitext(filename)[0]
img = cv2.imread(os.path.join(file_path_img, image_name + '.jpg'))
[h, w, c] = img.shape
for ix in range(len(boxes)):
x_min = int(boxes[ix, 0])
y_min = int(boxes[ix, 1])
x_max = int(boxes[ix, 2])
y_max = int(boxes[ix, 3])
img = cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color,2)
#画文字框
label_text = 'ship(100%)'
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)[0]
label_xmin = x_min
label_ymin = y_min - label_size[1]
if (label_ymin < 1):
label_ymin = 1
label_xmax = label_xmin + label_size[0]
label_ymax = label_ymin + label_size[1]
cv2.rectangle(img, (label_xmin - 1, label_ymin - 5), (label_xmax +1, label_ymax -3), label_background_color,-1)
cv2.putText(img, label_text, (x_min, y_min-4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, label_text_color, 1)
image_save = save_file_path + "/" + image_name + '.jpg'
print(image_save)
cv2.imwrite( image_save ,img)
预测图和真值图二合一
import cv2
import numpy as np
import os
import os.path
path1 = 'D:/szx/yolo/darknet/val_img/'
path2 = 'ground_truth/'
save_path = 'final_save/'
pathDir = open('ImageSets/Main/val.txt').read().strip().split()
for idx in range(len(pathDir)):
image_name= pathDir[idx] + '.jpg'
image1 = os.path.join(path1, image_name)
image2 = os.path.join(path2, image_name)
img1 = cv2.imread(image1)
img2 = cv2.imread(image2)
# print(idx)
print(image1)
# new image
final_matrix = np.zeros((256, 512, 3), np.uint8)
# # change
final_matrix[0:256, 0:256] = img1
final_matrix[0:256, 256:512] = img2
image_save = save_path + image_name
cv2.imwrite( image_save ,final_matrix)
计算锚点值
darknet.exe detector calc_anchors data/ship.data -num_of_clusters 9 -width 512 -height 512
YOLOV4-CSP试验
查看网络结构
darknet.exe detector test data/ship.data cfg/yolov4-csp-ship.cfg
截取网络结构
darknet.exe partial cfg/yolov4-csp-ship.cfg yolov4-csp.weights yolov4-csp.conv.142 142
训练模型
darknet.exe detector train data/ship.data cfg/yolov4-csp-ship.cfg yolov4-csp.conv.142 -map
darknet.exe detector train data/ship.data cfg/yolov4-csp-ship.cfg backup/yolov4-csp-ship_1000.weights -map
评估模型
darknet.exe detector valid data/ship.data cfg/yolov4-ship.cfg backup/yolov4-ship_best.weights
darknet.exe detector map data/ship.data cfg/yolov4-csp-ship.cfg backup/yolov4-csp-ship_last.weights
批量测试图片
获得test合集
darknet.exe detector test data/ship.data cfg/yolov4-csp-ship.cfg backup/yolov4-csp-ship_last.weights -ext_output -dont_show -out csp_val_result.json < data/val.txt >> csp_val_result.txt
对result合集进行画图



