lxCameraApi/generate_qrcode_dataset.py

import os
import random
import math
import numpy as np
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
from qrcode import QRCode
import qrcode.constants
import cv2


def generate_random_gradient(size=(2048, 2048)):
    """生成随机渐变背景"""
    width, height = size

    # 随机选择两个颜色
    color1 = [random.randint(0, 255) for _ in range(3)]
    color2 = [random.randint(0, 255) for _ in range(3)]

    # 使用 NumPy 生成渐变数组（更高效）
    gradient = np.linspace(color1, color2, height, dtype=np.uint8)
    gradient = np.tile(gradient, (width, 1, 1))
    gradient = np.transpose(gradient, (1, 0, 2))

    # 转换为 PIL Image
    img = Image.fromarray(gradient)

    return img


def generate_qr_code(size=(200, 200)):
    """生成二维码（透明背景）"""
    qr = QRCode(
        version=1,
        error_correction=qrcode.constants.ERROR_CORRECT_L,
        box_size=10,
        border=4,
    )

    # 随机生成数据
    data = ''.join([str(random.randint(0, 9)) for _ in range(50)])
    qr.add_data(data)
    qr.make(fit=True)

    # 转换为 PIL Image（先生成白色背景）
    img = qr.make_image(fill_color="black", back_color="white")

    # 转换为 RGBA
    img = img.convert('RGBA')

    # 将白色背景转为透明
    datas = img.getdata()
    new_data = []
    for item in datas:
        # 如果是白色（接近白色），则设为透明
        if item[0] > 230 and item[1] > 230 and item[2] > 230:
            new_data.append((255, 255, 255, 0))
        else:
            new_data.append(item)

    img.putdata(new_data)

    # 调整大小
    img = img.resize(size, Image.Resampling.LANCZOS)

    return img


def transform_qr_code(qr_img):
    """对二维码进行随机变换（亮度、清晰度）"""
    # 确保图片是 RGBA 模式
    if qr_img.mode != 'RGBA':
        qr_img = qr_img.convert('RGBA')

    # 随机亮度（只影响 RGB 通道，不影响 alpha 通道）
    r, g, b, a = qr_img.split()
    rgb_img = Image.merge('RGB', (r, g, b))

    brightness = ImageEnhance.Brightness(rgb_img)
    rgb_img = brightness.enhance(random.uniform(0.7, 1.3))

    # 随机对比度
    contrast = ImageEnhance.Contrast(rgb_img)
    rgb_img = contrast.enhance(random.uniform(0.8, 1.2))

    # 随机模糊（清晰度）
    blur_radius = random.uniform(0, 1.5)
    if blur_radius > 0:
        rgb_img = rgb_img.filter(ImageFilter.GaussianBlur(radius=blur_radius))

    # 重新合并 RGB 和 alpha 通道
    qr_img = Image.merge('RGBA', rgb_img.split() + (a,))

    return qr_img


def is_overlapping(x1, y1, w1, h1, x2, y2, w2, h2, margin=20):
    """检查两个矩形是否重叠（添加边距）"""
    return not (x1 + w1 + margin < x2 or
                x2 + w2 + margin < x1 or
                y1 + h1 + margin < y2 or
                y2 + h2 + margin < y1)


def calculate_bbox(img_size, x, y, width, height):
    """
    计算边界框（YOLO 格式）

    Args:
        img_size: 原始图片大小 (width, height)
        x, y: 二维码左上角坐标
        width, height: 二维码宽高

    Returns:
        (x_center, y_center, bbox_width, bbox_height) - YOLO 格式的归一化坐标
    """
    img_w, img_h = img_size

    # 计算二维码中心
    center_x = x + width / 2
    center_y = y + height / 2

    # 边界框就是二维码本身
    bbox_width = width
    bbox_height = height

    # 归一化到 [0, 1]
    x_norm = center_x / img_w
    y_norm = center_y / img_h
    w_norm = bbox_width / img_w
    h_norm = bbox_height / img_h

    # 确保在 [0, 1] 范围内
    x_norm = max(0, min(1, x_norm))
    y_norm = max(0, min(1, y_norm))
    w_norm = max(0, min(1, w_norm))
    h_norm = max(0, min(1, h_norm))

    return x_norm, y_norm, w_norm, h_norm


def place_qr_codes(img, num_qrcodes=20):
    """
    在图片上放置多个不重叠的二维码

    Returns:
        img: 生成的图片
        annotations: 标注信息列表 [(x, y, w, h), ...]
    """
    img_w, img_h = img.size
    annotations = []
    placed_boxes = []

    for i in range(num_qrcodes):
        # 随机二维码大小（50-200像素）
        # 重点关注 50-100 范围，增加小目标样本
        if random.random() < 0.7:
            # 70% 概率生成 50-100 的小目标
            qr_size_value = random.randint(50, 100)
        else:
            # 30% 概率生成 100-200 的中等目标
            qr_size_value = random.randint(100, 200)
        qr_size = (qr_size_value, qr_size_value)

        # 生成二维码
        qr_img = generate_qr_code(qr_size)

        # 变换二维码（不旋转）
        qr_img_transformed = transform_qr_code(qr_img)

        qr_w, qr_h = qr_img_transformed.size

        # 随机位置（确保二维码完全在图片内）
        max_x = img_w - qr_w
        max_y = img_h - qr_h

        # 尝试找到不重叠的位置
        max_attempts = 100
        for attempt in range(max_attempts):
            x = random.randint(0, max_x)
            y = random.randint(0, max_y)

            # 检查是否与已放置的二维码重叠
            overlapping = False
            for px, py, pw, ph in placed_boxes:
                if is_overlapping(x, y, qr_w, qr_h, px, py, pw, ph):
                    overlapping = True
                    break

            if not overlapping:
                # 粘贴二维码（使用 alpha 通道作为 mask）
                # img 需要先转换为 RGBA 模式
                if img.mode != 'RGBA':
                    img = img.convert('RGBA')

                # 提取 alpha 通道作为 mask
                alpha = qr_img_transformed.split()[-1]
                img.paste(qr_img_transformed, (x, y), alpha)

                # 记录放置的盒子
                placed_boxes.append((x, y, qr_w, qr_h))

                # 计算边界框（直接使用二维码的位置和大小）
                bbox_x, bbox_y, bbox_w, bbox_h = calculate_bbox(
                    (img_w, img_h), x, y, qr_w, qr_h
                )

                annotations.append((bbox_x, bbox_y, bbox_w, bbox_h))
                break
        else:
            print(f"Warning: Could not place QR code {i+1} without overlap")

    return img, annotations


def save_annotation(label_path, annotations):
    """保存标注文件（YOLO 格式）"""
    with open(label_path, 'w') as f:
        for x, y, w, h in annotations:
            # 类别为 0（二维码）
            f.write(f"0 {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")


def generate_dataset(output_root, num_train_images=30):
    """生成 YOLO8 二维码检测数据集"""
    image_size = (2048, 2048)
    # 增加每张图片的二维码数量，提高小目标检测效果
    num_qrcodes_per_image = 50  # 从 20 增加到 50
    num_val_images = max(1, num_train_images // 10)

    # 创建文件夹
    train_images_dir = os.path.join(output_root, "train", "images")
    train_labels_dir = os.path.join(output_root, "train", "labels")
    val_images_dir = os.path.join(output_root, "val", "images")
    val_labels_dir = os.path.join(output_root, "val", "labels")

    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)

    print(f"Generating {num_train_images} training images...")
    for i in range(num_train_images):
        print(f"  Generating training image {i+1}/{num_train_images}")

        # 生成随机渐变背景
        img = generate_random_gradient(image_size)

        # 放置二维码
        img, annotations = place_qr_codes(img, num_qrcodes_per_image)

        # 保存图片（转换为 RGB，JPEG 不支持透明通道）
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        img_name = f"train_{i:04d}.jpg"
        img_path = os.path.join(train_images_dir, img_name)
        img.save(img_path, quality=95)

        # 保存标注
        label_name = f"train_{i:04d}.txt"
        label_path = os.path.join(train_labels_dir, label_name)
        save_annotation(label_path, annotations)

    print(f"Generating {num_val_images} validation images...")
    for i in range(num_val_images):
        print(f"  Generating validation image {i+1}/{num_val_images}")

        # 生成随机渐变背景
        img = generate_random_gradient(image_size)

        # 放置二维码
        img, annotations = place_qr_codes(img, num_qrcodes_per_image)

        # 保存图片（转换为 RGB，JPEG 不支持透明通道）
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        img_name = f"val_{i:04d}.jpg"
        img_path = os.path.join(val_images_dir, img_name)
        img.save(img_path, quality=95)

        # 保存标注
        label_name = f"val_{i:04d}.txt"
        label_path = os.path.join(val_labels_dir, label_name)
        save_annotation(label_path, annotations)

    # 生成 data.yaml
    yaml_content = f"""path: {os.path.abspath(output_root)}
train: train/images
val: val/images

nc: 1
names: ['qrcode']
"""

    yaml_path = os.path.join(output_root, "data.yaml")
    with open(yaml_path, 'w', encoding='utf-8') as f:
        f.write(yaml_content)

    print("\n" + "="*50)
    print("Dataset generation completed!")
    print("="*50)
    print(f"Output directory: {output_root}")
    print(f"Training images: {num_train_images}")
    print(f"Validation images: {num_val_images}")
    print(f"QR codes per image: {num_qrcodes_per_image}")
    print(f"Image size: {image_size}")
    print(f"Number of classes: 1")
    print(f"Class name: qrcode")
    print("="*50)


if __name__ == '__main__':
    # 输出目录
    output_root = r"D:\PycharmProjects\yolo\qrcode_dataset"

    # 生成训练集图片数量（建议至少 100 张）
    num_train_images = 100

    # 生成数据集
    generate_dataset(output_root, num_train_images)

    print("\n" + "="*50)
    print("训练建议：")
    print("="*50)
    print("使用 YOLOv8-p2 进行小目标检测：")
    print("  yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=4096")
    print("")
    print("如果显存足够，可以使用 8192 输入尺寸：")
    print("  yolo train data=data.yaml model=yolov8n-p2.pt epochs=200 imgsz=8192")
    print("")
    print("如果需要更高精度，使用 s 模型：")
    print("  yolo train data=data.yaml model=yolov8s-p2.pt epochs=200 imgsz=4096")
    print("="*50)