""" 生成LCD/OLED屏幕检测模拟缺陷数据 模拟真实场景:边缘/角落缺陷更集中,某些时段缺陷更多,特定设备座号缺陷集中 """ import numpy as np import pandas as pd from datetime import datetime, timedelta import json import os np.random.seed(42) # --- 配置 --- NUM_PANELS = 500 # 检测面板总数 OUTPUT_FILE = "defect_data.csv" # 面板尺寸 (mm) PANEL_WIDTH = 155.0 PANEL_HEIGHT = 340.0 # 前贴附制程设备配置 # 模拟3台前贴附设备,每台有4x5=20个座号 LAMINATION_EQUIPMENT = { "LAM-A01": {"rows": 4, "cols": 5, "total_seats": 20}, "LAM-A02": {"rows": 4, "cols": 5, "total_seats": 20}, "LAM-B01": {"rows": 5, "cols": 4, "total_seats": 20}, } # 座号格式: 行号-列号,如 R1C1, R1C2, ... def get_seat_names(n_rows, n_cols): seats = [] for r in range(1, n_rows + 1): for c in range(1, n_cols + 1): seats.append(f"R{r}C{c}") return seats # 模拟座号缺陷倾向(某些座号因设备问题缺陷更多) # LAM-A01 的 R2C3 座号吸嘴老化 → 气泡缺陷集中 # LAM-A01 的 R4C1 座号加热不均 → 漏光缺陷集中 # LAM-A02 的 R1C5 座号压力不均 → 色差缺陷集中 # LAM-B01 的 R3C2 座号异物污染 → 异物缺陷集中 SEAT_DEFECT_BIAS = { "LAM-A01_R2C3": {"defect_type": "气泡", "weight_boost": 3.0}, "LAM-A01_R4C1": {"defect_type": "漏光", "weight_boost": 2.5}, "LAM-A02_R1C5": {"defect_type": "色差", "weight_boost": 2.5}, "LAM-B01_R3C2": {"defect_type": "异物", "weight_boost": 3.0}, } # 缺陷类型及其权重(模拟帕累托分布:少数类型占多数) DEFECT_TYPES = { "划痕": 0.30, "亮点": 0.20, "暗点": 0.15, "气泡": 0.12, "色差": 0.08, "漏光": 0.07, "裂纹": 0.04, "异物": 0.04, } # 生产时间范围:模拟30天的数据 START_DATE = datetime(2026, 4, 1, 8, 0, 0) END_DATE = datetime(2026, 4, 30, 20, 0, 0) # 前贴附制程检测工位 (AOI) INSPECTION_STATIONS = ["AOI-前贴附#1", "AOI-前贴附#2", "AOI-后段全检"] def generate_panel_positions(): """生成面板位置分布,模拟空间集中性""" positions = [] # 热点1:左边缘区域(贴合工艺问题) n1 = np.random.randint(200, 350) x1 = np.random.normal(8, 5, n1) y1 = np.random.uniform(20, PANEL_HEIGHT - 20, n1) # 热点2:右下角(受力集中区) n2 = np.random.randint(150, 280) x2 = np.random.normal(PANEL_WIDTH - 15, 8, n2) y2 = np.random.normal(PANEL_HEIGHT - 20, 15, n2) # 热点3:中心偏上(FPC绑定区域) n3 = np.random.randint(100, 200) x3 = np.random.normal(PANEL_WIDTH / 2, 20, n3) y3 = np.random.normal(PANEL_HEIGHT * 0.75, 12, n3) # 热点4:上边缘 n4 = np.random.randint(80, 150) x4 = np.random.uniform(30, PANEL_WIDTH - 30, n4) y4 = np.random.normal(10, 4, n4) # 均匀分布的随机缺陷(背景噪声) n5 = np.random.randint(200, 400) x5 = np.random.uniform(5, PANEL_WIDTH - 5, n5) y5 = np.random.uniform(5, PANEL_HEIGHT - 5, n5) all_x = np.concatenate([x1, x2, x3, x4, x5]) all_y = np.concatenate([y1, y2, y3, y4, y5]) mask = (all_x >= 0) & (all_x <= PANEL_WIDTH) & (all_y >= 0) & (all_y <= PANEL_HEIGHT) positions = list(zip(np.clip(all_x[mask], 0, PANEL_WIDTH), np.clip(all_y[mask], 0, PANEL_HEIGHT))) return positions def generate_time_distribution(n_defects): """生成时间分布,模拟特定时段缺陷集中""" timestamps = [] total_seconds = (END_DATE - START_DATE).total_seconds() for _ in range(n_defects): random_seconds = np.random.uniform(0, total_seconds) ts = START_DATE + timedelta(seconds=random_seconds) # 夜班(17:00-8:00)缺陷权重更高 hour = ts.hour if hour >= 17 or hour < 8: if np.random.random() > 0.6: timestamps.append(ts) else: day_seconds = np.random.uniform(0, 9 * 3600) day_ts = ts.replace(hour=8) + timedelta(seconds=day_seconds) timestamps.append(day_ts) else: timestamps.append(ts) return timestamps def assign_equipment_and_seat(n_defects, timestamps): """为每个缺陷分配设备和座号""" equipment_list = list(LAMINATION_EQUIPMENT.keys()) equipment_ids = [] seat_ids = [] for ts in timestamps: # 根据时间段分配设备(模拟不同班次使用不同设备) hour = ts.hour if hour < 12: eq_idx = 0 # 白班主要用 LAM-A01 elif hour < 17: eq_idx = np.random.choice([0, 1]) # 下午两台都用 else: eq_idx = np.random.choice([1, 2]) # 夜班用 LAM-A02 和 LAM-B01 eq_id = equipment_list[eq_idx] eq_info = LAMINATION_EQUIPMENT[eq_id] seat_names = get_seat_names(eq_info["rows"], eq_info["cols"]) seat = np.random.choice(seat_names) equipment_ids.append(eq_id) seat_ids.append(seat) return equipment_ids, seat_ids def generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids): """生成缺陷类型,考虑座号偏差""" types = list(DEFECT_TYPES.keys()) weights = np.array(list(DEFECT_TYPES.values())) defect_type_list = [] for i in range(n_defects): eq_id = equipment_ids[i] seat_id = seat_ids[i] key = f"{eq_id}_{seat_id}" if key in SEAT_DEFECT_BIAS: bias = SEAT_DEFECT_BIAS[key] # 创建新的权重分布,增加特定缺陷类型的概率 biased_weights = weights.copy() type_idx = types.index(bias["defect_type"]) biased_weights[type_idx] *= bias["weight_boost"] biased_weights /= biased_weights.sum() defect_type = np.random.choice(types, p=biased_weights) else: defect_type = np.random.choice(types, p=weights) defect_type_list.append(defect_type) return defect_type_list def generate_severity(defect_type): """根据缺陷类型生成严重程度""" severity_map = { "裂纹": np.random.choice(["严重", "中等"], p=[0.7, 0.3]), "漏光": np.random.choice(["严重", "中等", "轻微"], p=[0.4, 0.4, 0.2]), "划痕": np.random.choice(["严重", "中等", "轻微"], p=[0.2, 0.4, 0.4]), } return severity_map.get(defect_type, np.random.choice(["轻微", "中等", "严重"], p=[0.5, 0.35, 0.15])) def generate_data(): """生成完整的缺陷数据集""" print("生成模拟缺陷数据...") # 生成空间位置 positions = generate_panel_positions() n_defects = len(positions) print(f" 生成 {n_defects} 个缺陷记录") # 生成时间 timestamps = generate_time_distribution(n_defects) # 分配设备和座号 equipment_ids, seat_ids = assign_equipment_and_seat(n_defects, timestamps) # 生成缺陷类型(考虑座号偏差) defect_type_list = generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids) # 生成面板ID (模拟500块面板) panel_ids = [f"PANEL-{np.random.randint(1, NUM_PANELS+1):04d}" for _ in range(n_defects)] # 生成批次号 batch_ids = [f"BATCH-{ts.strftime('%Y%m%d')}" for ts in timestamps] # 生成严重程度 severities = [generate_severity(dt) for dt in defect_type_list] # 生成检测工位 inspection_stations = [np.random.choice(INSPECTION_STATIONS, p=[0.4, 0.4, 0.2]) for _ in range(n_defects)] # 创建 DataFrame df = pd.DataFrame({ "defect_id": [f"D{i+1:05d}" for i in range(n_defects)], "panel_id": panel_ids, "batch_id": batch_ids, "equipment_id": equipment_ids, "seat_id": seat_ids, "inspection_station": inspection_stations, "timestamp": timestamps, "defect_type": defect_type_list, "severity": severities, "x_mm": [round(p[0], 2) for p in positions], "y_mm": [round(p[1], 2) for p in positions], "panel_width_mm": PANEL_WIDTH, "panel_height_mm": PANEL_HEIGHT, "hour": [ts.hour for ts in timestamps], "shift": ["夜班" if (ts.hour >= 17 or ts.hour < 8) else "白班" for ts in timestamps], "day": [ts.strftime("%Y-%m-%d") for ts in timestamps], }) # 保存 df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig") print(f"数据已保存到 {OUTPUT_FILE}") # 保存统计摘要 types = list(DEFECT_TYPES.keys()) summary = { "total_defects": n_defects, "total_panels": NUM_PANELS, "defect_types": {t: int((df["defect_type"] == t).sum()) for t in types}, "severity_distribution": {s: int((df["severity"] == s).sum()) for s in ["轻微", "中等", "严重"]}, "shift_distribution": {s: int((df["shift"] == s).sum()) for s in ["白班", "夜班"]}, "equipment_distribution": {e: int((df["equipment_id"] == e).sum()) for e in LAMINATION_EQUIPMENT.keys()}, "date_range": { "start": START_DATE.strftime("%Y-%m-%d"), "end": END_DATE.strftime("%Y-%m-%d"), }, "lamination_config": { "equipment": list(LAMINATION_EQUIPMENT.keys()), "seat_bias": {k: v["defect_type"] for k, v in SEAT_DEFECT_BIAS.items()}, }, } with open("data_summary.json", "w", encoding="utf-8") as f: json.dump(summary, f, ensure_ascii=False, indent=2) print(f"统计摘要已保存到 data_summary.json") return df if __name__ == "__main__": df = generate_data() print(f"\n数据概览:") print(f" 总记录数: {len(df)}") print(f" 缺陷类型数: {df['defect_type'].nunique()}") print(f" 面板数量: {df['panel_id'].nunique()}") print(f" 批次数量: {df['batch_id'].nunique()}") print(f" 设备数量: {df['equipment_id'].nunique()}") print(f" 座号数量: {df['seat_id'].nunique()}") print(f"\n缺陷类型分布:") print(df["defect_type"].value_counts().to_string()) print(f"\n设备分布:") print(df["equipment_id"].value_counts().to_string()) print(f"\n班次分布:") print(df["shift"].value_counts().to_string())