leo
/
defect-analysis


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
							"""
生成LCD/OLED屏幕检测模拟缺陷数据
模拟真实场景：边缘/角落缺陷更集中，某些时段缺陷更多，特定设备座号缺陷集中
"""

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import json
import os

np.random.seed(42)

# --- 配置 ---
NUM_PANELS = 500  # 检测面板总数
OUTPUT_FILE = "defect_data.csv"

# 面板尺寸 (mm)
PANEL_WIDTH = 155.0
PANEL_HEIGHT = 340.0

# 前贴附制程设备配置
# 模拟3台前贴附设备，每台有4x5=20个座号
LAMINATION_EQUIPMENT = {
    "LAM-A01": {"rows": 4, "cols": 5, "total_seats": 20},
    "LAM-A02": {"rows": 4, "cols": 5, "total_seats": 20},
    "LAM-B01": {"rows": 5, "cols": 4, "total_seats": 20},
}

# 座号格式: 行号-列号，如 R1C1, R1C2, ...
def get_seat_names(n_rows, n_cols):
    seats = []
    for r in range(1, n_rows + 1):
        for c in range(1, n_cols + 1):
            seats.append(f"R{r}C{c}")
    return seats

# 模拟座号缺陷倾向（某些座号因设备问题缺陷更多）
# LAM-A01 的 R2C3 座号吸嘴老化 → 气泡缺陷集中
# LAM-A01 的 R4C1 座号加热不均 → 漏光缺陷集中
# LAM-A02 的 R1C5 座号压力不均 → 色差缺陷集中
# LAM-B01 的 R3C2 座号异物污染 → 异物缺陷集中
SEAT_DEFECT_BIAS = {
    "LAM-A01_R2C3": {"defect_type": "气泡", "weight_boost": 3.0},
    "LAM-A01_R4C1": {"defect_type": "漏光", "weight_boost": 2.5},
    "LAM-A02_R1C5": {"defect_type": "色差", "weight_boost": 2.5},
    "LAM-B01_R3C2": {"defect_type": "异物", "weight_boost": 3.0},
}

# 缺陷类型及其权重（模拟帕累托分布：少数类型占多数）
DEFECT_TYPES = {
    "划痕": 0.30,
    "亮点": 0.20,
    "暗点": 0.15,
    "气泡": 0.12,
    "色差": 0.08,
    "漏光": 0.07,
    "裂纹": 0.04,
    "异物": 0.04,
}

# 生产时间范围：模拟30天的数据
START_DATE = datetime(2026, 4, 1, 8, 0, 0)
END_DATE = datetime(2026, 4, 30, 20, 0, 0)

# 前贴附制程检测工位 (AOI)
INSPECTION_STATIONS = ["AOI-前贴附#1", "AOI-前贴附#2", "AOI-后段全检"]


def generate_panel_positions():
    """生成面板位置分布，模拟空间集中性"""
    positions = []

    # 热点1：左边缘区域（贴合工艺问题）
    n1 = np.random.randint(200, 350)
    x1 = np.random.normal(8, 5, n1)
    y1 = np.random.uniform(20, PANEL_HEIGHT - 20, n1)

    # 热点2：右下角（受力集中区）
    n2 = np.random.randint(150, 280)
    x2 = np.random.normal(PANEL_WIDTH - 15, 8, n2)
    y2 = np.random.normal(PANEL_HEIGHT - 20, 15, n2)

    # 热点3：中心偏上（FPC绑定区域）
    n3 = np.random.randint(100, 200)
    x3 = np.random.normal(PANEL_WIDTH / 2, 20, n3)
    y3 = np.random.normal(PANEL_HEIGHT * 0.75, 12, n3)

    # 热点4：上边缘
    n4 = np.random.randint(80, 150)
    x4 = np.random.uniform(30, PANEL_WIDTH - 30, n4)
    y4 = np.random.normal(10, 4, n4)

    # 均匀分布的随机缺陷（背景噪声）
    n5 = np.random.randint(200, 400)
    x5 = np.random.uniform(5, PANEL_WIDTH - 5, n5)
    y5 = np.random.uniform(5, PANEL_HEIGHT - 5, n5)

    all_x = np.concatenate([x1, x2, x3, x4, x5])
    all_y = np.concatenate([y1, y2, y3, y4, y5])

    mask = (all_x >= 0) & (all_x <= PANEL_WIDTH) & (all_y >= 0) & (all_y <= PANEL_HEIGHT)
    positions = list(zip(np.clip(all_x[mask], 0, PANEL_WIDTH),
                         np.clip(all_y[mask], 0, PANEL_HEIGHT)))

    return positions


def generate_time_distribution(n_defects):
    """生成时间分布，模拟特定时段缺陷集中"""
    timestamps = []
    total_seconds = (END_DATE - START_DATE).total_seconds()

    for _ in range(n_defects):
        random_seconds = np.random.uniform(0, total_seconds)
        ts = START_DATE + timedelta(seconds=random_seconds)

        # 夜班（17:00-8:00）缺陷权重更高
        hour = ts.hour
        if hour >= 17 or hour < 8:
            if np.random.random() > 0.6:
                timestamps.append(ts)
            else:
                day_seconds = np.random.uniform(0, 9 * 3600)
                day_ts = ts.replace(hour=8) + timedelta(seconds=day_seconds)
                timestamps.append(day_ts)
        else:
            timestamps.append(ts)

    return timestamps


def assign_equipment_and_seat(n_defects, timestamps):
    """为每个缺陷分配设备和座号"""
    equipment_list = list(LAMINATION_EQUIPMENT.keys())
    equipment_ids = []
    seat_ids = []

    for ts in timestamps:
        # 根据时间段分配设备（模拟不同班次使用不同设备）
        hour = ts.hour
        if hour < 12:
            eq_idx = 0  # 白班主要用 LAM-A01
        elif hour < 17:
            eq_idx = np.random.choice([0, 1])  # 下午两台都用
        else:
            eq_idx = np.random.choice([1, 2])  # 夜班用 LAM-A02 和 LAM-B01

        eq_id = equipment_list[eq_idx]
        eq_info = LAMINATION_EQUIPMENT[eq_id]
        seat_names = get_seat_names(eq_info["rows"], eq_info["cols"])
        seat = np.random.choice(seat_names)

        equipment_ids.append(eq_id)
        seat_ids.append(seat)

    return equipment_ids, seat_ids


def generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids):
    """生成缺陷类型，考虑座号偏差"""
    types = list(DEFECT_TYPES.keys())
    weights = np.array(list(DEFECT_TYPES.values()))
    defect_type_list = []

    for i in range(n_defects):
        eq_id = equipment_ids[i]
        seat_id = seat_ids[i]
        key = f"{eq_id}_{seat_id}"

        if key in SEAT_DEFECT_BIAS:
            bias = SEAT_DEFECT_BIAS[key]
            # 创建新的权重分布，增加特定缺陷类型的概率
            biased_weights = weights.copy()
            type_idx = types.index(bias["defect_type"])
            biased_weights[type_idx] *= bias["weight_boost"]
            biased_weights /= biased_weights.sum()
            defect_type = np.random.choice(types, p=biased_weights)
        else:
            defect_type = np.random.choice(types, p=weights)

        defect_type_list.append(defect_type)

    return defect_type_list


def generate_severity(defect_type):
    """根据缺陷类型生成严重程度"""
    severity_map = {
        "裂纹": np.random.choice(["严重", "中等"], p=[0.7, 0.3]),
        "漏光": np.random.choice(["严重", "中等", "轻微"], p=[0.4, 0.4, 0.2]),
        "划痕": np.random.choice(["严重", "中等", "轻微"], p=[0.2, 0.4, 0.4]),
    }
    return severity_map.get(defect_type,
                            np.random.choice(["轻微", "中等", "严重"], p=[0.5, 0.35, 0.15]))


def generate_geometry(defect_type):
    """按缺陷类型生成点/线/面几何属性。"""
    if defect_type in ["划痕", "裂纹"]:
        length = float(np.random.uniform(4, 35))
        width = float(np.random.uniform(0.05, 0.4))
        return {
            "defect_geometry_type": "line",
            "width_mm": round(width, 2),
            "height_mm": 0.0,
            "length_mm": round(length, 2),
            "angle_deg": round(float(np.random.uniform(0, 180)), 1),
            "area_mm2": round(length * width, 2),
        }
    if defect_type in ["漏光", "色差", "气泡"]:
        width = float(np.random.uniform(1.5, 12))
        height = float(np.random.uniform(1.5, 16))
        return {
            "defect_geometry_type": "region",
            "width_mm": round(width, 2),
            "height_mm": round(height, 2),
            "length_mm": 0.0,
            "angle_deg": 0.0,
            "area_mm2": round(width * height, 2),
        }
    size = float(np.random.uniform(0.05, 0.8))
    return {
        "defect_geometry_type": "point",
        "width_mm": round(size, 2),
        "height_mm": round(size, 2),
        "length_mm": 0.0,
        "angle_deg": 0.0,
        "area_mm2": round(size * size, 3),
    }


def generate_data():
    """生成完整的缺陷数据集"""
    print("生成模拟缺陷数据...")

    # 生成空间位置
    positions = generate_panel_positions()
    n_defects = len(positions)
    print(f"  生成 {n_defects} 个缺陷记录")

    # 生成时间
    timestamps = generate_time_distribution(n_defects)

    # 分配设备和座号
    equipment_ids, seat_ids = assign_equipment_and_seat(n_defects, timestamps)

    # 生成缺陷类型（考虑座号偏差）
    defect_type_list = generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids)

    # 生成面板ID (模拟500块面板)
    panel_ids = [f"PANEL-{np.random.randint(1, NUM_PANELS+1):04d}" for _ in range(n_defects)]

    # 生成批次号
    batch_ids = [f"BATCH-{ts.strftime('%Y%m%d')}" for ts in timestamps]

    # 生成严重程度
    severities = [generate_severity(dt) for dt in defect_type_list]

    # 生成检测工位
    inspection_stations = [np.random.choice(INSPECTION_STATIONS, p=[0.4, 0.4, 0.2]) for _ in range(n_defects)]

    geometries = [generate_geometry(dt) for dt in defect_type_list]
    x_values = [round(p[0], 2) for p in positions]
    y_values = [round(p[1], 2) for p in positions]

    clean_equipment_ids = [f"CLN-{np.random.choice(['A01', 'A02'])}" for _ in range(n_defects)]
    clean_slot_ids = [f"SLOT-{np.random.randint(1, 13):02d}" for _ in range(n_defects)]
    bond_equipment_ids = [f"BON-{np.random.choice(['A01', 'A02', 'B01'])}" for _ in range(n_defects)]
    bond_head_ids = [f"HEAD-{np.random.randint(1, 7):02d}" for _ in range(n_defects)]
    recipe_ids = [f"RCP-LAM-{eq[-3:]}" for eq in equipment_ids]
    lam_fixture_ids = [f"FIX-{eq[-3:]}-{np.random.randint(1, 5):02d}" for eq in equipment_ids]
    lam_jig_ids = [f"JIG-{seat}" for seat in seat_ids]
    lam_nozzle_ids = [f"NZ-{np.random.randint(1, 21):02d}" for _ in range(n_defects)]
    material_lot_glass = [f"GLS-{ts.strftime('%Y%m%d')}-{np.random.randint(1, 4)}" for ts in timestamps]
    material_lot_oca = [f"OCA-{ts.strftime('%Y%W')}-{np.random.randint(1, 6)}" for ts in timestamps]
    material_lot_polarizer = [f"POL-{ts.strftime('%Y%W')}-{np.random.randint(1, 5)}" for ts in timestamps]
    material_lot_cover = [f"CVR-{ts.strftime('%Y%W')}-{np.random.randint(1, 4)}" for ts in timestamps]

    # 创建 DataFrame
    df = pd.DataFrame({
        "defect_id": [f"D{i+1:05d}" for i in range(n_defects)],
        "panel_id": panel_ids,
        "batch_id": batch_ids,
        "equipment_id": equipment_ids,
        "seat_id": seat_ids,
        "inspection_station": inspection_stations,
        "timestamp": timestamps,
        "defect_type": defect_type_list,
        "severity": severities,
        "x_mm": x_values,
        "y_mm": y_values,
        "panel_width_mm": PANEL_WIDTH,
        "panel_height_mm": PANEL_HEIGHT,
        "hour": [ts.hour for ts in timestamps],
        "shift": ["夜班" if (ts.hour >= 17 or ts.hour < 8) else "白班" for ts in timestamps],
        "day": [ts.strftime("%Y-%m-%d") for ts in timestamps],
        "defect_geometry_type": [g["defect_geometry_type"] for g in geometries],
        "width_mm": [g["width_mm"] for g in geometries],
        "height_mm": [g["height_mm"] for g in geometries],
        "length_mm": [g["length_mm"] for g in geometries],
        "angle_deg": [g["angle_deg"] for g in geometries],
        "area_mm2": [g["area_mm2"] for g in geometries],
        "bbox_x_min_mm": [max(0, x - g["width_mm"] / 2) for x, g in zip(x_values, geometries)],
        "bbox_y_min_mm": [max(0, y - g["height_mm"] / 2) for y, g in zip(y_values, geometries)],
        "bbox_x_max_mm": [min(PANEL_WIDTH, x + g["width_mm"] / 2) for x, g in zip(x_values, geometries)],
        "bbox_y_max_mm": [min(PANEL_HEIGHT, y + g["height_mm"] / 2) for y, g in zip(y_values, geometries)],
        "contour_json": "",
        "process_step": "前贴附",
        "recipe_id": recipe_ids,
        "operator_id": [f"OP-{np.random.randint(1, 9):02d}" for _ in range(n_defects)],
        "clean_equipment_id": clean_equipment_ids,
        "clean_slot_id": clean_slot_ids,
        "clean_recipe_id": [f"RCP-CLN-{eq[-3:]}" for eq in clean_equipment_ids],
        "lam_equipment_id": equipment_ids,
        "lam_seat_id": seat_ids,
        "lam_fixture_id": lam_fixture_ids,
        "lam_jig_id": lam_jig_ids,
        "lam_nozzle_id": lam_nozzle_ids,
        "bond_equipment_id": bond_equipment_ids,
        "bond_head_id": bond_head_ids,
        "bond_recipe_id": [f"RCP-BON-{eq[-3:]}" for eq in bond_equipment_ids],
        "aoi_equipment_id": inspection_stations,
        "aoi_station_id": inspection_stations,
        "material_lot_glass": material_lot_glass,
        "material_lot_oca": material_lot_oca,
        "material_lot_polarizer": material_lot_polarizer,
        "material_lot_cover": material_lot_cover,
    })

    # 保存
    df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig")
    print(f"数据已保存到 {OUTPUT_FILE}")

    # 保存统计摘要
    types = list(DEFECT_TYPES.keys())
    summary = {
        "total_defects": n_defects,
        "total_panels": NUM_PANELS,
        "defect_types": {t: int((df["defect_type"] == t).sum()) for t in types},
        "severity_distribution": {s: int((df["severity"] == s).sum()) for s in ["轻微", "中等", "严重"]},
        "shift_distribution": {s: int((df["shift"] == s).sum()) for s in ["白班", "夜班"]},
        "equipment_distribution": {e: int((df["equipment_id"] == e).sum()) for e in LAMINATION_EQUIPMENT.keys()},
        "date_range": {
            "start": START_DATE.strftime("%Y-%m-%d"),
            "end": END_DATE.strftime("%Y-%m-%d"),
        },
        "lamination_config": {
            "equipment": list(LAMINATION_EQUIPMENT.keys()),
            "seat_bias": {k: v["defect_type"] for k, v in SEAT_DEFECT_BIAS.items()},
        },
    }
    with open("data_summary.json", "w", encoding="utf-8") as f:
        json.dump(summary, f, ensure_ascii=False, indent=2)
    print(f"统计摘要已保存到 data_summary.json")

    return df


if __name__ == "__main__":
    df = generate_data()
    print(f"\n数据概览:")
    print(f"  总记录数: {len(df)}")
    print(f"  缺陷类型数: {df['defect_type'].nunique()}")
    print(f"  面板数量: {df['panel_id'].nunique()}")
    print(f"  批次数量: {df['batch_id'].nunique()}")
    print(f"  设备数量: {df['equipment_id'].nunique()}")
    print(f"  座号数量: {df['seat_id'].nunique()}")
    print(f"\n缺陷类型分布:")
    print(df["defect_type"].value_counts().to_string())
    print(f"\n设备分布:")
    print(df["equipment_id"].value_counts().to_string())
    print(f"\n班次分布:")
    print(df["shift"].value_counts().to_string())