You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lxCameraApi/generate_qrcode_dataset.py

333 lines
10 KiB
Python

1 week ago
import os
import random
import math
import numpy as np
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
from qrcode import QRCode
import qrcode.constants
import cv2
def generate_random_gradient(size=(2048, 2048)):
"""生成随机渐变背景"""
width, height = size
# 随机选择两个颜色
color1 = [random.randint(0, 255) for _ in range(3)]
color2 = [random.randint(0, 255) for _ in range(3)]
# 使用 NumPy 生成渐变数组(更高效)
gradient = np.linspace(color1, color2, height, dtype=np.uint8)
gradient = np.tile(gradient, (width, 1, 1))
gradient = np.transpose(gradient, (1, 0, 2))
# 转换为 PIL Image
img = Image.fromarray(gradient)
return img
def generate_qr_code(size=(200, 200)):
"""生成二维码(透明背景)"""
qr = QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
# 随机生成数据
data = ''.join([str(random.randint(0, 9)) for _ in range(50)])
qr.add_data(data)
qr.make(fit=True)
# 转换为 PIL Image先生成白色背景
img = qr.make_image(fill_color="black", back_color="white")
# 转换为 RGBA
img = img.convert('RGBA')
# 将白色背景转为透明
datas = img.getdata()
new_data = []
for item in datas:
# 如果是白色(接近白色),则设为透明
if item[0] > 230 and item[1] > 230 and item[2] > 230:
new_data.append((255, 255, 255, 0))
else:
new_data.append(item)
img.putdata(new_data)
# 调整大小
img = img.resize(size, Image.Resampling.LANCZOS)
return img
def transform_qr_code(qr_img):
"""对二维码进行随机变换(亮度、清晰度)"""
# 确保图片是 RGBA 模式
if qr_img.mode != 'RGBA':
qr_img = qr_img.convert('RGBA')
# 随机亮度(只影响 RGB 通道,不影响 alpha 通道)
r, g, b, a = qr_img.split()
rgb_img = Image.merge('RGB', (r, g, b))
brightness = ImageEnhance.Brightness(rgb_img)
rgb_img = brightness.enhance(random.uniform(0.7, 1.3))
# 随机对比度
contrast = ImageEnhance.Contrast(rgb_img)
rgb_img = contrast.enhance(random.uniform(0.8, 1.2))
# 随机模糊(清晰度)
blur_radius = random.uniform(0, 1.5)
if blur_radius > 0:
rgb_img = rgb_img.filter(ImageFilter.GaussianBlur(radius=blur_radius))
# 重新合并 RGB 和 alpha 通道
qr_img = Image.merge('RGBA', rgb_img.split() + (a,))
return qr_img
def is_overlapping(x1, y1, w1, h1, x2, y2, w2, h2, margin=20):
"""检查两个矩形是否重叠(添加边距)"""
return not (x1 + w1 + margin < x2 or
x2 + w2 + margin < x1 or
y1 + h1 + margin < y2 or
y2 + h2 + margin < y1)
def calculate_bbox(img_size, x, y, width, height):
"""
计算边界框YOLO 格式
Args:
img_size: 原始图片大小 (width, height)
x, y: 二维码左上角坐标
width, height: 二维码宽高
Returns:
(x_center, y_center, bbox_width, bbox_height) - YOLO 格式的归一化坐标
"""
img_w, img_h = img_size
# 计算二维码中心
center_x = x + width / 2
center_y = y + height / 2
# 边界框就是二维码本身
bbox_width = width
bbox_height = height
# 归一化到 [0, 1]
x_norm = center_x / img_w
y_norm = center_y / img_h
w_norm = bbox_width / img_w
h_norm = bbox_height / img_h
# 确保在 [0, 1] 范围内
x_norm = max(0, min(1, x_norm))
y_norm = max(0, min(1, y_norm))
w_norm = max(0, min(1, w_norm))
h_norm = max(0, min(1, h_norm))
return x_norm, y_norm, w_norm, h_norm
def place_qr_codes(img, num_qrcodes=20):
"""
在图片上放置多个不重叠的二维码
Returns:
img: 生成的图片
annotations: 标注信息列表 [(x, y, w, h), ...]
"""
img_w, img_h = img.size
annotations = []
placed_boxes = []
for i in range(num_qrcodes):
# 随机二维码大小50-200像素
# 重点关注 50-100 范围,增加小目标样本
if random.random() < 0.7:
# 70% 概率生成 50-100 的小目标
qr_size_value = random.randint(50, 100)
else:
# 30% 概率生成 100-200 的中等目标
qr_size_value = random.randint(100, 200)
qr_size = (qr_size_value, qr_size_value)
# 生成二维码
qr_img = generate_qr_code(qr_size)
# 变换二维码(不旋转)
qr_img_transformed = transform_qr_code(qr_img)
qr_w, qr_h = qr_img_transformed.size
# 随机位置(确保二维码完全在图片内)
max_x = img_w - qr_w
max_y = img_h - qr_h
# 尝试找到不重叠的位置
max_attempts = 100
for attempt in range(max_attempts):
x = random.randint(0, max_x)
y = random.randint(0, max_y)
# 检查是否与已放置的二维码重叠
overlapping = False
for px, py, pw, ph in placed_boxes:
if is_overlapping(x, y, qr_w, qr_h, px, py, pw, ph):
overlapping = True
break
if not overlapping:
# 粘贴二维码(使用 alpha 通道作为 mask
# img 需要先转换为 RGBA 模式
if img.mode != 'RGBA':
img = img.convert('RGBA')
# 提取 alpha 通道作为 mask
alpha = qr_img_transformed.split()[-1]
img.paste(qr_img_transformed, (x, y), alpha)
# 记录放置的盒子
placed_boxes.append((x, y, qr_w, qr_h))
# 计算边界框(直接使用二维码的位置和大小)
bbox_x, bbox_y, bbox_w, bbox_h = calculate_bbox(
(img_w, img_h), x, y, qr_w, qr_h
)
annotations.append((bbox_x, bbox_y, bbox_w, bbox_h))
break
else:
print(f"Warning: Could not place QR code {i+1} without overlap")
return img, annotations
def save_annotation(label_path, annotations):
"""保存标注文件YOLO 格式)"""
with open(label_path, 'w') as f:
for x, y, w, h in annotations:
# 类别为 0二维码
f.write(f"0 {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")
def generate_dataset(output_root, num_train_images=30):
"""生成 YOLO8 二维码检测数据集"""
image_size = (2048, 2048)
# 增加每张图片的二维码数量,提高小目标检测效果
num_qrcodes_per_image = 50 # 从 20 增加到 50
num_val_images = max(1, num_train_images // 10)
# 创建文件夹
train_images_dir = os.path.join(output_root, "train", "images")
train_labels_dir = os.path.join(output_root, "train", "labels")
val_images_dir = os.path.join(output_root, "val", "images")
val_labels_dir = os.path.join(output_root, "val", "labels")
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
print(f"Generating {num_train_images} training images...")
for i in range(num_train_images):
print(f" Generating training image {i+1}/{num_train_images}")
# 生成随机渐变背景
img = generate_random_gradient(image_size)
# 放置二维码
img, annotations = place_qr_codes(img, num_qrcodes_per_image)
# 保存图片(转换为 RGBJPEG 不支持透明通道)
if img.mode == 'RGBA':
img = img.convert('RGB')
img_name = f"train_{i:04d}.jpg"
img_path = os.path.join(train_images_dir, img_name)
img.save(img_path, quality=95)
# 保存标注
label_name = f"train_{i:04d}.txt"
label_path = os.path.join(train_labels_dir, label_name)
save_annotation(label_path, annotations)
print(f"Generating {num_val_images} validation images...")
for i in range(num_val_images):
print(f" Generating validation image {i+1}/{num_val_images}")
# 生成随机渐变背景
img = generate_random_gradient(image_size)
# 放置二维码
img, annotations = place_qr_codes(img, num_qrcodes_per_image)
# 保存图片(转换为 RGBJPEG 不支持透明通道)
if img.mode == 'RGBA':
img = img.convert('RGB')
img_name = f"val_{i:04d}.jpg"
img_path = os.path.join(val_images_dir, img_name)
img.save(img_path, quality=95)
# 保存标注
label_name = f"val_{i:04d}.txt"
label_path = os.path.join(val_labels_dir, label_name)
save_annotation(label_path, annotations)
# 生成 data.yaml
yaml_content = f"""path: {os.path.abspath(output_root)}
train: train/images
val: val/images
nc: 1
names: ['qrcode']
"""
yaml_path = os.path.join(output_root, "data.yaml")
with open(yaml_path, 'w', encoding='utf-8') as f:
f.write(yaml_content)
print("\n" + "="*50)
print("Dataset generation completed!")
print("="*50)
print(f"Output directory: {output_root}")
print(f"Training images: {num_train_images}")
print(f"Validation images: {num_val_images}")
print(f"QR codes per image: {num_qrcodes_per_image}")
print(f"Image size: {image_size}")
print(f"Number of classes: 1")
print(f"Class name: qrcode")
print("="*50)
if __name__ == '__main__':
# 输出目录
output_root = r"D:\PycharmProjects\yolo\qrcode_dataset"
# 生成训练集图片数量(建议至少 100 张)
num_train_images = 100
# 生成数据集
generate_dataset(output_root, num_train_images)
print("\n" + "="*50)
print("训练建议:")
print("="*50)
print("使用 YOLOv8-p2 进行小目标检测:")
print(" yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=4096")
print("")
print("如果显存足够,可以使用 8192 输入尺寸:")
print(" yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=8192")
print("")
print("如果需要更高精度,使用 s 模型:")
print(" yolo train data=data.yaml model=yolov8s-p2.pt epochs=200 imgsz=4096")
print("="*50)