leo
/
defect-analysis


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
							"""
生成LCD/OLED屏幕检测模拟缺陷数据
模拟真实场景：边缘/角落缺陷更集中，某些时段缺陷更多，特定设备座号缺陷集中
"""

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import json
import os

np.random.seed(42)

# --- 配置 ---
NUM_PANELS = 500  # 检测面板总数
OUTPUT_FILE = "defect_data.csv"

# 面板尺寸 (mm)
PANEL_WIDTH = 155.0
PANEL_HEIGHT = 340.0

# 前贴附制程设备配置
# 模拟3台前贴附设备，每台有4x5=20个座号
LAMINATION_EQUIPMENT = {
    "LAM-A01": {"rows": 4, "cols": 5, "total_seats": 20},
    "LAM-A02": {"rows": 4, "cols": 5, "total_seats": 20},
    "LAM-B01": {"rows": 5, "cols": 4, "total_seats": 20},
}

# 座号格式: 行号-列号，如 R1C1, R1C2, ...
def get_seat_names(n_rows, n_cols):
    seats = []
    for r in range(1, n_rows + 1):
        for c in range(1, n_cols + 1):
            seats.append(f"R{r}C{c}")
    return seats

# 模拟座号缺陷倾向（某些座号因设备问题缺陷更多）
# LAM-A01 的 R2C3 座号吸嘴老化 → 气泡缺陷集中
# LAM-A01 的 R4C1 座号加热不均 → 漏光缺陷集中
# LAM-A02 的 R1C5 座号压力不均 → 色差缺陷集中
# LAM-B01 的 R3C2 座号异物污染 → 异物缺陷集中
SEAT_DEFECT_BIAS = {
    "LAM-A01_R2C3": {"defect_type": "气泡", "weight_boost": 3.0},
    "LAM-A01_R4C1": {"defect_type": "漏光", "weight_boost": 2.5},
    "LAM-A02_R1C5": {"defect_type": "色差", "weight_boost": 2.5},
    "LAM-B01_R3C2": {"defect_type": "异物", "weight_boost": 3.0},
}

# 缺陷类型及其权重（模拟帕累托分布：少数类型占多数）
DEFECT_TYPES = {
    "划痕": 0.30,
    "亮点": 0.20,
    "暗点": 0.15,
    "气泡": 0.12,
    "色差": 0.08,
    "漏光": 0.07,
    "裂纹": 0.04,
    "异物": 0.04,
}

# 生产时间范围：模拟30天的数据
START_DATE = datetime(2026, 4, 1, 8, 0, 0)
END_DATE = datetime(2026, 4, 30, 20, 0, 0)

# 前贴附制程检测工位 (AOI)
INSPECTION_STATIONS = ["AOI-前贴附#1", "AOI-前贴附#2", "AOI-后段全检"]


def generate_panel_positions():
    """生成面板位置分布，模拟空间集中性"""
    positions = []

    # 热点1：左边缘区域（贴合工艺问题）
    n1 = np.random.randint(200, 350)
    x1 = np.random.normal(8, 5, n1)
    y1 = np.random.uniform(20, PANEL_HEIGHT - 20, n1)

    # 热点2：右下角（受力集中区）
    n2 = np.random.randint(150, 280)
    x2 = np.random.normal(PANEL_WIDTH - 15, 8, n2)
    y2 = np.random.normal(PANEL_HEIGHT - 20, 15, n2)

    # 热点3：中心偏上（FPC绑定区域）
    n3 = np.random.randint(100, 200)
    x3 = np.random.normal(PANEL_WIDTH / 2, 20, n3)
    y3 = np.random.normal(PANEL_HEIGHT * 0.75, 12, n3)

    # 热点4：上边缘
    n4 = np.random.randint(80, 150)
    x4 = np.random.uniform(30, PANEL_WIDTH - 30, n4)
    y4 = np.random.normal(10, 4, n4)

    # 均匀分布的随机缺陷（背景噪声）
    n5 = np.random.randint(200, 400)
    x5 = np.random.uniform(5, PANEL_WIDTH - 5, n5)
    y5 = np.random.uniform(5, PANEL_HEIGHT - 5, n5)

    all_x = np.concatenate([x1, x2, x3, x4, x5])
    all_y = np.concatenate([y1, y2, y3, y4, y5])

    mask = (all_x >= 0) & (all_x <= PANEL_WIDTH) & (all_y >= 0) & (all_y <= PANEL_HEIGHT)
    positions = list(zip(np.clip(all_x[mask], 0, PANEL_WIDTH),
                         np.clip(all_y[mask], 0, PANEL_HEIGHT)))

    return positions


def generate_time_distribution(n_defects):
    """生成时间分布，模拟特定时段缺陷集中"""
    timestamps = []
    total_seconds = (END_DATE - START_DATE).total_seconds()

    for _ in range(n_defects):
        random_seconds = np.random.uniform(0, total_seconds)
        ts = START_DATE + timedelta(seconds=random_seconds)

        # 夜班（17:00-8:00）缺陷权重更高
        hour = ts.hour
        if hour >= 17 or hour < 8:
            if np.random.random() > 0.6:
                timestamps.append(ts)
            else:
                day_seconds = np.random.uniform(0, 9 * 3600)
                day_ts = ts.replace(hour=8) + timedelta(seconds=day_seconds)
                timestamps.append(day_ts)
        else:
            timestamps.append(ts)

    return timestamps


def assign_equipment_and_seat(n_defects, timestamps):
    """为每个缺陷分配设备和座号"""
    equipment_list = list(LAMINATION_EQUIPMENT.keys())
    equipment_ids = []
    seat_ids = []

    for ts in timestamps:
        # 根据时间段分配设备（模拟不同班次使用不同设备）
        hour = ts.hour
        if hour < 12:
            eq_idx = 0  # 白班主要用 LAM-A01
        elif hour < 17:
            eq_idx = np.random.choice([0, 1])  # 下午两台都用
        else:
            eq_idx = np.random.choice([1, 2])  # 夜班用 LAM-A02 和 LAM-B01

        eq_id = equipment_list[eq_idx]
        eq_info = LAMINATION_EQUIPMENT[eq_id]
        seat_names = get_seat_names(eq_info["rows"], eq_info["cols"])
        seat = np.random.choice(seat_names)

        equipment_ids.append(eq_id)
        seat_ids.append(seat)

    return equipment_ids, seat_ids


def generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids):
    """生成缺陷类型，考虑座号偏差"""
    types = list(DEFECT_TYPES.keys())
    weights = np.array(list(DEFECT_TYPES.values()))
    defect_type_list = []

    for i in range(n_defects):
        eq_id = equipment_ids[i]
        seat_id = seat_ids[i]
        key = f"{eq_id}_{seat_id}"

        if key in SEAT_DEFECT_BIAS:
            bias = SEAT_DEFECT_BIAS[key]
            # 创建新的权重分布，增加特定缺陷类型的概率
            biased_weights = weights.copy()
            type_idx = types.index(bias["defect_type"])
            biased_weights[type_idx] *= bias["weight_boost"]
            biased_weights /= biased_weights.sum()
            defect_type = np.random.choice(types, p=biased_weights)
        else:
            defect_type = np.random.choice(types, p=weights)

        defect_type_list.append(defect_type)

    return defect_type_list


def generate_severity(defect_type):
    """根据缺陷类型生成严重程度"""
    severity_map = {
        "裂纹": np.random.choice(["严重", "中等"], p=[0.7, 0.3]),
        "漏光": np.random.choice(["严重", "中等", "轻微"], p=[0.4, 0.4, 0.2]),
        "划痕": np.random.choice(["严重", "中等", "轻微"], p=[0.2, 0.4, 0.4]),
    }
    return severity_map.get(defect_type,
                            np.random.choice(["轻微", "中等", "严重"], p=[0.5, 0.35, 0.15]))


def generate_data():
    """生成完整的缺陷数据集"""
    print("生成模拟缺陷数据...")

    # 生成空间位置
    positions = generate_panel_positions()
    n_defects = len(positions)
    print(f"  生成 {n_defects} 个缺陷记录")

    # 生成时间
    timestamps = generate_time_distribution(n_defects)

    # 分配设备和座号
    equipment_ids, seat_ids = assign_equipment_and_seat(n_defects, timestamps)

    # 生成缺陷类型（考虑座号偏差）
    defect_type_list = generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids)

    # 生成面板ID (模拟500块面板)
    panel_ids = [f"PANEL-{np.random.randint(1, NUM_PANELS+1):04d}" for _ in range(n_defects)]

    # 生成批次号
    batch_ids = [f"BATCH-{ts.strftime('%Y%m%d')}" for ts in timestamps]

    # 生成严重程度
    severities = [generate_severity(dt) for dt in defect_type_list]

    # 生成检测工位
    inspection_stations = [np.random.choice(INSPECTION_STATIONS, p=[0.4, 0.4, 0.2]) for _ in range(n_defects)]

    # 创建 DataFrame
    df = pd.DataFrame({
        "defect_id": [f"D{i+1:05d}" for i in range(n_defects)],
        "panel_id": panel_ids,
        "batch_id": batch_ids,
        "equipment_id": equipment_ids,
        "seat_id": seat_ids,
        "inspection_station": inspection_stations,
        "timestamp": timestamps,
        "defect_type": defect_type_list,
        "severity": severities,
        "x_mm": [round(p[0], 2) for p in positions],
        "y_mm": [round(p[1], 2) for p in positions],
        "panel_width_mm": PANEL_WIDTH,
        "panel_height_mm": PANEL_HEIGHT,
        "hour": [ts.hour for ts in timestamps],
        "shift": ["夜班" if (ts.hour >= 17 or ts.hour < 8) else "白班" for ts in timestamps],
        "day": [ts.strftime("%Y-%m-%d") for ts in timestamps],
    })

    # 保存
    df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig")
    print(f"数据已保存到 {OUTPUT_FILE}")

    # 保存统计摘要
    types = list(DEFECT_TYPES.keys())
    summary = {
        "total_defects": n_defects,
        "total_panels": NUM_PANELS,
        "defect_types": {t: int((df["defect_type"] == t).sum()) for t in types},
        "severity_distribution": {s: int((df["severity"] == s).sum()) for s in ["轻微", "中等", "严重"]},
        "shift_distribution": {s: int((df["shift"] == s).sum()) for s in ["白班", "夜班"]},
        "equipment_distribution": {e: int((df["equipment_id"] == e).sum()) for e in LAMINATION_EQUIPMENT.keys()},
        "date_range": {
            "start": START_DATE.strftime("%Y-%m-%d"),
            "end": END_DATE.strftime("%Y-%m-%d"),
        },
        "lamination_config": {
            "equipment": list(LAMINATION_EQUIPMENT.keys()),
            "seat_bias": {k: v["defect_type"] for k, v in SEAT_DEFECT_BIAS.items()},
        },
    }
    with open("data_summary.json", "w", encoding="utf-8") as f:
        json.dump(summary, f, ensure_ascii=False, indent=2)
    print(f"统计摘要已保存到 data_summary.json")

    return df


if __name__ == "__main__":
    df = generate_data()
    print(f"\n数据概览:")
    print(f"  总记录数: {len(df)}")
    print(f"  缺陷类型数: {df['defect_type'].nunique()}")
    print(f"  面板数量: {df['panel_id'].nunique()}")
    print(f"  批次数量: {df['batch_id'].nunique()}")
    print(f"  设备数量: {df['equipment_id'].nunique()}")
    print(f"  座号数量: {df['seat_id'].nunique()}")
    print(f"\n缺陷类型分布:")
    print(df["defect_type"].value_counts().to_string())
    print(f"\n设备分布:")
    print(df["equipment_id"].value_counts().to_string())
    print(f"\n班次分布:")
    print(df["shift"].value_counts().to_string())