You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lxCameraApi/generate_qrcode_dataset.py

333 lines
10 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import random
import math
import numpy as np
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
from qrcode import QRCode
import qrcode.constants
import cv2
def generate_random_gradient(size=(2048, 2048)):
"""生成随机渐变背景"""
width, height = size
# 随机选择两个颜色
color1 = [random.randint(0, 255) for _ in range(3)]
color2 = [random.randint(0, 255) for _ in range(3)]
# 使用 NumPy 生成渐变数组(更高效)
gradient = np.linspace(color1, color2, height, dtype=np.uint8)
gradient = np.tile(gradient, (width, 1, 1))
gradient = np.transpose(gradient, (1, 0, 2))
# 转换为 PIL Image
img = Image.fromarray(gradient)
return img
def generate_qr_code(size=(200, 200)):
"""生成二维码(透明背景)"""
qr = QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
# 随机生成数据
data = ''.join([str(random.randint(0, 9)) for _ in range(50)])
qr.add_data(data)
qr.make(fit=True)
# 转换为 PIL Image先生成白色背景
img = qr.make_image(fill_color="black", back_color="white")
# 转换为 RGBA
img = img.convert('RGBA')
# 将白色背景转为透明
datas = img.getdata()
new_data = []
for item in datas:
# 如果是白色(接近白色),则设为透明
if item[0] > 230 and item[1] > 230 and item[2] > 230:
new_data.append((255, 255, 255, 0))
else:
new_data.append(item)
img.putdata(new_data)
# 调整大小
img = img.resize(size, Image.Resampling.LANCZOS)
return img
def transform_qr_code(qr_img):
"""对二维码进行随机变换(亮度、清晰度)"""
# 确保图片是 RGBA 模式
if qr_img.mode != 'RGBA':
qr_img = qr_img.convert('RGBA')
# 随机亮度(只影响 RGB 通道,不影响 alpha 通道)
r, g, b, a = qr_img.split()
rgb_img = Image.merge('RGB', (r, g, b))
brightness = ImageEnhance.Brightness(rgb_img)
rgb_img = brightness.enhance(random.uniform(0.7, 1.3))
# 随机对比度
contrast = ImageEnhance.Contrast(rgb_img)
rgb_img = contrast.enhance(random.uniform(0.8, 1.2))
# 随机模糊(清晰度)
blur_radius = random.uniform(0, 1.5)
if blur_radius > 0:
rgb_img = rgb_img.filter(ImageFilter.GaussianBlur(radius=blur_radius))
# 重新合并 RGB 和 alpha 通道
qr_img = Image.merge('RGBA', rgb_img.split() + (a,))
return qr_img
def is_overlapping(x1, y1, w1, h1, x2, y2, w2, h2, margin=20):
"""检查两个矩形是否重叠(添加边距)"""
return not (x1 + w1 + margin < x2 or
x2 + w2 + margin < x1 or
y1 + h1 + margin < y2 or
y2 + h2 + margin < y1)
def calculate_bbox(img_size, x, y, width, height):
"""
计算边界框YOLO 格式)
Args:
img_size: 原始图片大小 (width, height)
x, y: 二维码左上角坐标
width, height: 二维码宽高
Returns:
(x_center, y_center, bbox_width, bbox_height) - YOLO 格式的归一化坐标
"""
img_w, img_h = img_size
# 计算二维码中心
center_x = x + width / 2
center_y = y + height / 2
# 边界框就是二维码本身
bbox_width = width
bbox_height = height
# 归一化到 [0, 1]
x_norm = center_x / img_w
y_norm = center_y / img_h
w_norm = bbox_width / img_w
h_norm = bbox_height / img_h
# 确保在 [0, 1] 范围内
x_norm = max(0, min(1, x_norm))
y_norm = max(0, min(1, y_norm))
w_norm = max(0, min(1, w_norm))
h_norm = max(0, min(1, h_norm))
return x_norm, y_norm, w_norm, h_norm
def place_qr_codes(img, num_qrcodes=20):
"""
在图片上放置多个不重叠的二维码
Returns:
img: 生成的图片
annotations: 标注信息列表 [(x, y, w, h), ...]
"""
img_w, img_h = img.size
annotations = []
placed_boxes = []
for i in range(num_qrcodes):
# 随机二维码大小50-200像素
# 重点关注 50-100 范围,增加小目标样本
if random.random() < 0.7:
# 70% 概率生成 50-100 的小目标
qr_size_value = random.randint(50, 100)
else:
# 30% 概率生成 100-200 的中等目标
qr_size_value = random.randint(100, 200)
qr_size = (qr_size_value, qr_size_value)
# 生成二维码
qr_img = generate_qr_code(qr_size)
# 变换二维码(不旋转)
qr_img_transformed = transform_qr_code(qr_img)
qr_w, qr_h = qr_img_transformed.size
# 随机位置(确保二维码完全在图片内)
max_x = img_w - qr_w
max_y = img_h - qr_h
# 尝试找到不重叠的位置
max_attempts = 100
for attempt in range(max_attempts):
x = random.randint(0, max_x)
y = random.randint(0, max_y)
# 检查是否与已放置的二维码重叠
overlapping = False
for px, py, pw, ph in placed_boxes:
if is_overlapping(x, y, qr_w, qr_h, px, py, pw, ph):
overlapping = True
break
if not overlapping:
# 粘贴二维码(使用 alpha 通道作为 mask
# img 需要先转换为 RGBA 模式
if img.mode != 'RGBA':
img = img.convert('RGBA')
# 提取 alpha 通道作为 mask
alpha = qr_img_transformed.split()[-1]
img.paste(qr_img_transformed, (x, y), alpha)
# 记录放置的盒子
placed_boxes.append((x, y, qr_w, qr_h))
# 计算边界框(直接使用二维码的位置和大小)
bbox_x, bbox_y, bbox_w, bbox_h = calculate_bbox(
(img_w, img_h), x, y, qr_w, qr_h
)
annotations.append((bbox_x, bbox_y, bbox_w, bbox_h))
break
else:
print(f"Warning: Could not place QR code {i+1} without overlap")
return img, annotations
def save_annotation(label_path, annotations):
"""保存标注文件YOLO 格式)"""
with open(label_path, 'w') as f:
for x, y, w, h in annotations:
# 类别为 0二维码
f.write(f"0 {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")
def generate_dataset(output_root, num_train_images=30):
"""生成 YOLO8 二维码检测数据集"""
image_size = (2048, 2048)
# 增加每张图片的二维码数量,提高小目标检测效果
num_qrcodes_per_image = 50 # 从 20 增加到 50
num_val_images = max(1, num_train_images // 10)
# 创建文件夹
train_images_dir = os.path.join(output_root, "train", "images")
train_labels_dir = os.path.join(output_root, "train", "labels")
val_images_dir = os.path.join(output_root, "val", "images")
val_labels_dir = os.path.join(output_root, "val", "labels")
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
print(f"Generating {num_train_images} training images...")
for i in range(num_train_images):
print(f" Generating training image {i+1}/{num_train_images}")
# 生成随机渐变背景
img = generate_random_gradient(image_size)
# 放置二维码
img, annotations = place_qr_codes(img, num_qrcodes_per_image)
# 保存图片(转换为 RGBJPEG 不支持透明通道)
if img.mode == 'RGBA':
img = img.convert('RGB')
img_name = f"train_{i:04d}.jpg"
img_path = os.path.join(train_images_dir, img_name)
img.save(img_path, quality=95)
# 保存标注
label_name = f"train_{i:04d}.txt"
label_path = os.path.join(train_labels_dir, label_name)
save_annotation(label_path, annotations)
print(f"Generating {num_val_images} validation images...")
for i in range(num_val_images):
print(f" Generating validation image {i+1}/{num_val_images}")
# 生成随机渐变背景
img = generate_random_gradient(image_size)
# 放置二维码
img, annotations = place_qr_codes(img, num_qrcodes_per_image)
# 保存图片(转换为 RGBJPEG 不支持透明通道)
if img.mode == 'RGBA':
img = img.convert('RGB')
img_name = f"val_{i:04d}.jpg"
img_path = os.path.join(val_images_dir, img_name)
img.save(img_path, quality=95)
# 保存标注
label_name = f"val_{i:04d}.txt"
label_path = os.path.join(val_labels_dir, label_name)
save_annotation(label_path, annotations)
# 生成 data.yaml
yaml_content = f"""path: {os.path.abspath(output_root)}
train: train/images
val: val/images
nc: 1
names: ['qrcode']
"""
yaml_path = os.path.join(output_root, "data.yaml")
with open(yaml_path, 'w', encoding='utf-8') as f:
f.write(yaml_content)
print("\n" + "="*50)
print("Dataset generation completed!")
print("="*50)
print(f"Output directory: {output_root}")
print(f"Training images: {num_train_images}")
print(f"Validation images: {num_val_images}")
print(f"QR codes per image: {num_qrcodes_per_image}")
print(f"Image size: {image_size}")
print(f"Number of classes: 1")
print(f"Class name: qrcode")
print("="*50)
if __name__ == '__main__':
# 输出目录
output_root = r"D:\PycharmProjects\yolo\qrcode_dataset"
# 生成训练集图片数量(建议至少 100 张)
num_train_images = 100
# 生成数据集
generate_dataset(output_root, num_train_images)
print("\n" + "="*50)
print("训练建议:")
print("="*50)
print("使用 YOLOv8-p2 进行小目标检测:")
print(" yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=4096")
print("")
print("如果显存足够,可以使用 8192 输入尺寸:")
print(" yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=8192")
print("")
print("如果需要更高精度,使用 s 模型:")
print(" yolo train data=data.yaml model=yolov8s-p2.pt epochs=200 imgsz=4096")
print("="*50)