代码拉取完成,页面将自动刷新
import os
from PIL import Image
import xml.etree.ElementTree as ET
import re
import shutil
ANNOTATIONS_PATH = "/Annotations"
IMAGE_PATH = "/images"
LABEL_PATH = "/labels"
def remove_xml_image_by_name(data_dir, filename):
xml_directory = data_dir + ANNOTATIONS_PATH
images_directory = data_dir + IMAGE_PATH
# 删除对应的xml和image
xml_file_path = os.path.join(xml_directory, f'{filename}.xml')
if os.path.exists(xml_file_path): # 检查对应的XML文件是否存在
print(f'正在删除对应的Annotations文件: {xml_file_path}')
os.remove(xml_file_path) # 删除对应的XML文件
else:
print(f"警告:对应的图像文件 {xml_file_path} 不存在,无法删除")
image_file_path = os.path.join(images_directory, f'{filename}.jpg')
if os.path.exists(image_file_path): # 检查对应的XML文件是否存在
print(f'正在删除对应的Image文件: {image_file_path}')
os.remove(image_file_path) # 删除对应的XML文件
else:
print(f"警告:对应的图像文件 {image_file_path} 不存在,无法删除")
def rename_files(data_dir):
print(f"\n---------检查数据集 {data_dir} 重命名 JPG to jpg------------")
directory = images_directory = data_dir + IMAGE_PATH
for filename in os.listdir(directory):
# 检查文件名中是否包含大写的"JPG"
if "JPG" in filename:
# 构建新的文件名,将"JPG"替换为"jpg"
new_filename = filename.replace("JPG", "jpg")
old_file_path = os.path.join(directory, filename)
new_file_path = os.path.join(directory, new_filename)
# 重命名文件
print(f"Renaming '{filename}' to '{new_filename}'")
shutil.move(old_file_path, new_file_path)
def check_xml_dimensions(data_dir):
print(f"\n---------检查数据集 {data_dir} 目录下的所有xml文件是否正确------------")
xml_directory = data_dir + ANNOTATIONS_PATH
# 遍历指定目录下的所有文件
for filename in os.listdir(xml_directory):
if filename.endswith('.xml'): # 确保只处理XML文件
filepath = os.path.join(xml_directory, filename)
# 解析XML文件
tree = ET.parse(filepath)
root = tree.getroot()
# 查找标注中的size元素
size_element = root.find('size')
if size_element is not None:
# 获取width和height属性
width = int(size_element.find('width').text)
height = int(size_element.find('height').text)
# 检查width和height是否为0
if width == 0 or height == 0:
print(f"File: {filepath}, Width: {width}, Height: {height}")
remove_xml_image_by_name(data_dir, os.path.splitext(filename)[0])
def check_images(data_dir):
print(f"\n------------检查数据集{data_dir}目录下的所有图片文件是否正确------------")
images_directory = data_dir + IMAGE_PATH
for root, dirs, files in os.walk(images_directory):
for file in files:
if file.endswith('.jpg') or file.endswith('.jpeg'):
file_path = os.path.join(root, file)
try:
img = Image.open(file_path) # Open the image file
img.verify() # Verify that it's a valid image
except (IOError, SyntaxError) as e:
print(f'发现损坏的图片: {file_path}')
remove_xml_image_by_name(data_dir, os.path.splitext(file)[0])
def check_empty_yolo_label(data_dir):
"""检查数据集目录下的所有标注文件是否为空"""
print(f"\n------------检查数据集{data_dir}目录下的所有txt文件是否为空------------")
labels_directory = data_dir + LABEL_PATH
empty_files = []
for root, dirs, files in os.walk(labels_directory):
for file in files:
if file.endswith(".txt"): # 假设标注文件是.txt格式
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
lines = f.readlines()
if all(line.startswith('#') or line.strip() == '' for line in lines): # 检查是否全为空或注释
empty_files.append(file_path)
print(f"found empty txt file: {file_path}")
print(f'正在删除对应的txt文件: {file_path}')
os.remove(file_path)
remove_xml_image_by_name(data_dir, os.path.splitext(file)[0])
def check_yolo_label_validity(data_dir):
"""
检查YOLO格式的标签文件是否合法。
:param data_dir: 存放YOLO标签文件的目录路径。
"""
print(f"\n------------检查数据集{data_dir}目录下的YOLO格式的标签文件是否合法------------")
labels_directory = data_dir + LABEL_PATH
# 正确的正则表达式,匹配YOLO格式的标签行:类ID 整数 四个归一化坐标值,每两个值间由空格分隔
yolo_pattern = re.compile(r'^\d+\s+\d+(\.\d+)?\s+\d+(\.\d+)?\s+\d+(\.\d+)?\s+\d+(\.\d+)?$')
for root, dirs, files in os.walk(labels_directory):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
lines = f.readlines()
for i, line in enumerate(lines, start=1):
# 去除行尾的换行符并检查是否符合YOLO格式
if not yolo_pattern.match(line.strip()):
print(f"警告:文件 {file_path} 第{i}行不符合YOLO标签格式:{line.strip()}")
os.remove(file_path)
remove_xml_image_by_name(data_dir, os.path.splitext(file)[0])
# 可选:进一步检查坐标和尺寸是否在0-1之间
else:
coords = list(map(float, line.strip().split()[1:]))
if any(coord < 0 or coord > 1 for coord in coords[:4]): # 检查前四个坐标值
print(f"警告:文件 {file_path} 第{i}行的坐标或尺寸超出范围(0-1):{line.strip()}")
os.remove(file_path)
remove_xml_image_by_name(data_dir, os.path.splitext(file)[0])
def check_yolo_image_label_match(data_dir, image_extension=('.jpg', '.png', 'jpeg'), text_extension=('.txt')):
images_directory = data_dir + IMAGE_PATH
labels_directory = data_dir + LABEL_PATH
if not os.path.isdir(images_directory) or not os.path.isdir(labels_directory):
print("请确保指定的Images和labels目录存在。")
return
print(f"\n------------检查目录{images_directory} 和 {labels_directory}目录中文件的一致性------------")
# 获取两个目录下的所有文件名(不包括路径)
images = set(os.path.splitext(f)[0] for f in os.listdir(images_directory) if os.path.splitext(f)[1].lower() in image_extension)
labels = set(os.path.splitext(f)[0] for f in os.listdir(labels_directory) if os.path.splitext(f)[1].lower() in text_extension)
# 查找只有图片没有标签的文件
missing_labels = images - labels
if missing_labels:
print("缺少对应的.txt标签文件的图片:")
for img in missing_labels:
print(img + image_extension[0])
image_file_path = os.path.join(images_directory, img + image_extension[0])
print(f'正在删除对应的image文件: {image_file_path}')
os.remove(image_file_path)
# # 查找只有标签没有图片的文件
missing_images = labels - images
if missing_images:
print("缺少对应的图片文件的.txt标签:")
for label in missing_images:
print(label + text_extension)
txt_file_path = os.path.join(labels_directory, label + text_extension)
print(f'正在删除对应的txt文件: {txt_file_path}')
os.remove(txt_file_path)
if __name__ == '__main__':
current_path = os.getcwd()
# 指定要检查的目录
check_xml_dimensions(current_path)
rename_files(current_path)
# 检查图片文件
check_images(current_path)
#检查标签文件是否为空
check_empty_yolo_label(current_path)
#检查标签文件是否合法
check_yolo_label_validity(current_path)
# 确保目录存在
check_yolo_image_label_match(current_path)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。