文章目录
原理示例实现代码实际操作方法
原理
示例
如,这是Yolo的: 转换后就变成酱紫了: 注意:图像坐标原点在左上角
注意:作者引用图片路径时使用的时绝对路径,我们使用相对路径不知是否会出问题? https://github.com/Dontla/tensorflow-yolov3/blob/master/data/dataset/voc_train.txt
文件批量重命名参考:python 将指定路径(目录)下的图片或文本文件按给定序号重新排序,并批量重命名 yolo、tensorflow数据集批量处理
实现代码
"""
@File : convert.py
@Time : 2019/10/22 9:26
@Author : Dontla
@Email : sxana@qq.com
@Software: PyCharm
"""
import os
import re
import cv2
import random
def sort_filenames(filenames
, pattern
):
filenames
.sort
(key
=lambda x
: list(map(eval, re
.findall
(pattern
, x
))))
def extract_content(content
):
content_extract
= re
.findall
('(.*?) (.*?) (.*?) (.*?) (.*?)\n', content
)
return content_extract
if __name__
== '__main__':
source_img_path_related_to_train_py
= '../Dontla_Dataset/20190822_Artificial_Flower/20191023_f_cotton_g/'
source_img_path
= '../20191023_f_cotton_g/'
source_txt_path
= '../20191023_f_cotton_g_Annotations_Yolo/'
target_txt_path
= '../20191023_f_cotton_g_Annotations_Tensorflow-Yolov3_dataset/'
filenames
= os
.listdir
(source_txt_path
)
pattern
= '\((.*?)\)'
sort_filenames
(filenames
, pattern
)
'''
for filename in filenames:
if filename.endswith('.txt'):
filepath = os.path.join(source_txt_path, filename)
# print(filepath)
'''
train_file
= open(target_txt_path
+ 'train.txt', 'w', encoding
='utf-8')
test_file
= open(target_txt_path
+ 'test.txt', 'w', encoding
='utf-8')
train_file_content
= ''
test_file_content
= ''
for filename
in filenames
:
with open(os
.path
.join
(source_txt_path
, filename
), 'r', encoding
='utf-8') as f
:
content
= f
.read
()
content_extract
= extract_content
(content
)
img
= cv2
.imread
('{}{}.jpg'.format(source_img_path
, ''.join
(re
.findall
('(.*?).txt', filename
))))
img_width
= img
.shape
[1]
img_height
= img
.shape
[0]
object_strs
= source_img_path_related_to_train_py
+ os
.path
.splitext
(filename
)[0] + '.jpg'
for object_str
in content_extract
:
object_evar
= list(map(eval, object_str
))
class_id
= object_evar
[0]
(x
, y
) = (object_evar
[1] * img_width
, object_evar
[2] * img_height
)
(w
, h
) = (object_evar
[3] * img_width
, object_evar
[4] * img_height
)
object_strs
+= ' {},{},{},{},{}'.format(round(x
- w
/ 2), round(y
- h
/ 2), round(x
+ w
/ 2),
round(y
+ h
/ 2), class_id
)
train_scale
= 0.75
proba
= random
.random
()
if (proba
< train_scale
):
train_file_content
+= object_strs
+ '\n'
else:
test_file_content
+= object_strs
+ '\n'
train_file_content
= train_file_content
.strip
()
test_file_content
= test_file_content
.strip
()
train_file
.write
(train_file_content
)
test_file
.write
(test_file_content
)
train_file
.close
()
test_file
.close
()
'''
all = os.walk(source_txt_path)
# dirpath:从all中存储的source_txt_path下文件夹及子文件夹列表中取出每个文件夹及子文件夹路径
# dirnames :dirpath下的文件夹列表(不包括子文件夹)
# filenames :dirpath下文件的文件名列表
for dirpath, dirnames, filenames in all:
# print('path:',dirpath)
# print('dir:',dirnames)
# print('filelist:',filenames)
for filename in filenames:
# print(filename)
# 20190822_Artificial_Flower (1).txt
if filename.endswith('.txt'):
filepath = os.path.join(dirpath, filename)
# print(filepath)
# ../20190822_Artificial_Flower_Annotations_Yolo/20190822_Artificial_Flower (99).txt
with open(filepath, 'r', encoding='utf-8') as f:
content=f.read()
# 不能省略\n不然就识别不出来了
# content_extract=re.findall('(.*) (.*) (.*) (.*) (.*)\n',content)
content_extract=re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)\n',content)
# print(content_extract)
# [('0', '0.491797', '0.772917', '0.103906', '0.170833'), ('0', '0.355078', '0.569444', '0.116406', '0.183333')]
# Dontla deleted 20191023
# with open(filepath,'r',encoding='utf-8') as f:
# content_list=f.readlines()
#
# # print(content_list)
# # ['0 0.491797 0.772917 0.103906 0.170833\n', '0 0.355078 0.569444 0.116406 0.183333\n']
#
# for content in content_list:
# break
# # target_info=re.findall('(.*?) ')
'''
实际操作方法
略
升级版:添加了数据清洗,参见:将yolo标注转换为tensorflow_yolov3标注生成train.txt和test.txt同时做数据清洗