| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- """
- 生成LCD/OLED屏幕检测模拟缺陷数据
- 模拟真实场景:边缘/角落缺陷更集中,某些时段缺陷更多,特定设备座号缺陷集中
- """
- import numpy as np
- import pandas as pd
- from datetime import datetime, timedelta
- import json
- import os
- np.random.seed(42)
- # --- 配置 ---
- NUM_PANELS = 500 # 检测面板总数
- OUTPUT_FILE = "defect_data.csv"
- # 面板尺寸 (mm)
- PANEL_WIDTH = 155.0
- PANEL_HEIGHT = 340.0
- # 前贴附制程设备配置
- # 模拟3台前贴附设备,每台有4x5=20个座号
- LAMINATION_EQUIPMENT = {
- "LAM-A01": {"rows": 4, "cols": 5, "total_seats": 20},
- "LAM-A02": {"rows": 4, "cols": 5, "total_seats": 20},
- "LAM-B01": {"rows": 5, "cols": 4, "total_seats": 20},
- }
- # 座号格式: 行号-列号,如 R1C1, R1C2, ...
- def get_seat_names(n_rows, n_cols):
- seats = []
- for r in range(1, n_rows + 1):
- for c in range(1, n_cols + 1):
- seats.append(f"R{r}C{c}")
- return seats
- # 模拟座号缺陷倾向(某些座号因设备问题缺陷更多)
- # LAM-A01 的 R2C3 座号吸嘴老化 → 气泡缺陷集中
- # LAM-A01 的 R4C1 座号加热不均 → 漏光缺陷集中
- # LAM-A02 的 R1C5 座号压力不均 → 色差缺陷集中
- # LAM-B01 的 R3C2 座号异物污染 → 异物缺陷集中
- SEAT_DEFECT_BIAS = {
- "LAM-A01_R2C3": {"defect_type": "气泡", "weight_boost": 3.0},
- "LAM-A01_R4C1": {"defect_type": "漏光", "weight_boost": 2.5},
- "LAM-A02_R1C5": {"defect_type": "色差", "weight_boost": 2.5},
- "LAM-B01_R3C2": {"defect_type": "异物", "weight_boost": 3.0},
- }
- # 缺陷类型及其权重(模拟帕累托分布:少数类型占多数)
- DEFECT_TYPES = {
- "划痕": 0.30,
- "亮点": 0.20,
- "暗点": 0.15,
- "气泡": 0.12,
- "色差": 0.08,
- "漏光": 0.07,
- "裂纹": 0.04,
- "异物": 0.04,
- }
- # 生产时间范围:模拟30天的数据
- START_DATE = datetime(2026, 4, 1, 8, 0, 0)
- END_DATE = datetime(2026, 4, 30, 20, 0, 0)
- # 前贴附制程检测工位 (AOI)
- INSPECTION_STATIONS = ["AOI-前贴附#1", "AOI-前贴附#2", "AOI-后段全检"]
- def generate_panel_positions():
- """生成面板位置分布,模拟空间集中性"""
- positions = []
- # 热点1:左边缘区域(贴合工艺问题)
- n1 = np.random.randint(200, 350)
- x1 = np.random.normal(8, 5, n1)
- y1 = np.random.uniform(20, PANEL_HEIGHT - 20, n1)
- # 热点2:右下角(受力集中区)
- n2 = np.random.randint(150, 280)
- x2 = np.random.normal(PANEL_WIDTH - 15, 8, n2)
- y2 = np.random.normal(PANEL_HEIGHT - 20, 15, n2)
- # 热点3:中心偏上(FPC绑定区域)
- n3 = np.random.randint(100, 200)
- x3 = np.random.normal(PANEL_WIDTH / 2, 20, n3)
- y3 = np.random.normal(PANEL_HEIGHT * 0.75, 12, n3)
- # 热点4:上边缘
- n4 = np.random.randint(80, 150)
- x4 = np.random.uniform(30, PANEL_WIDTH - 30, n4)
- y4 = np.random.normal(10, 4, n4)
- # 均匀分布的随机缺陷(背景噪声)
- n5 = np.random.randint(200, 400)
- x5 = np.random.uniform(5, PANEL_WIDTH - 5, n5)
- y5 = np.random.uniform(5, PANEL_HEIGHT - 5, n5)
- all_x = np.concatenate([x1, x2, x3, x4, x5])
- all_y = np.concatenate([y1, y2, y3, y4, y5])
- mask = (all_x >= 0) & (all_x <= PANEL_WIDTH) & (all_y >= 0) & (all_y <= PANEL_HEIGHT)
- positions = list(zip(np.clip(all_x[mask], 0, PANEL_WIDTH),
- np.clip(all_y[mask], 0, PANEL_HEIGHT)))
- return positions
- def generate_time_distribution(n_defects):
- """生成时间分布,模拟特定时段缺陷集中"""
- timestamps = []
- total_seconds = (END_DATE - START_DATE).total_seconds()
- for _ in range(n_defects):
- random_seconds = np.random.uniform(0, total_seconds)
- ts = START_DATE + timedelta(seconds=random_seconds)
- # 夜班(17:00-8:00)缺陷权重更高
- hour = ts.hour
- if hour >= 17 or hour < 8:
- if np.random.random() > 0.6:
- timestamps.append(ts)
- else:
- day_seconds = np.random.uniform(0, 9 * 3600)
- day_ts = ts.replace(hour=8) + timedelta(seconds=day_seconds)
- timestamps.append(day_ts)
- else:
- timestamps.append(ts)
- return timestamps
- def assign_equipment_and_seat(n_defects, timestamps):
- """为每个缺陷分配设备和座号"""
- equipment_list = list(LAMINATION_EQUIPMENT.keys())
- equipment_ids = []
- seat_ids = []
- for ts in timestamps:
- # 根据时间段分配设备(模拟不同班次使用不同设备)
- hour = ts.hour
- if hour < 12:
- eq_idx = 0 # 白班主要用 LAM-A01
- elif hour < 17:
- eq_idx = np.random.choice([0, 1]) # 下午两台都用
- else:
- eq_idx = np.random.choice([1, 2]) # 夜班用 LAM-A02 和 LAM-B01
- eq_id = equipment_list[eq_idx]
- eq_info = LAMINATION_EQUIPMENT[eq_id]
- seat_names = get_seat_names(eq_info["rows"], eq_info["cols"])
- seat = np.random.choice(seat_names)
- equipment_ids.append(eq_id)
- seat_ids.append(seat)
- return equipment_ids, seat_ids
- def generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids):
- """生成缺陷类型,考虑座号偏差"""
- types = list(DEFECT_TYPES.keys())
- weights = np.array(list(DEFECT_TYPES.values()))
- defect_type_list = []
- for i in range(n_defects):
- eq_id = equipment_ids[i]
- seat_id = seat_ids[i]
- key = f"{eq_id}_{seat_id}"
- if key in SEAT_DEFECT_BIAS:
- bias = SEAT_DEFECT_BIAS[key]
- # 创建新的权重分布,增加特定缺陷类型的概率
- biased_weights = weights.copy()
- type_idx = types.index(bias["defect_type"])
- biased_weights[type_idx] *= bias["weight_boost"]
- biased_weights /= biased_weights.sum()
- defect_type = np.random.choice(types, p=biased_weights)
- else:
- defect_type = np.random.choice(types, p=weights)
- defect_type_list.append(defect_type)
- return defect_type_list
- def generate_severity(defect_type):
- """根据缺陷类型生成严重程度"""
- severity_map = {
- "裂纹": np.random.choice(["严重", "中等"], p=[0.7, 0.3]),
- "漏光": np.random.choice(["严重", "中等", "轻微"], p=[0.4, 0.4, 0.2]),
- "划痕": np.random.choice(["严重", "中等", "轻微"], p=[0.2, 0.4, 0.4]),
- }
- return severity_map.get(defect_type,
- np.random.choice(["轻微", "中等", "严重"], p=[0.5, 0.35, 0.15]))
- def generate_geometry(defect_type):
- """按缺陷类型生成点/线/面几何属性。"""
- if defect_type in ["划痕", "裂纹"]:
- length = float(np.random.uniform(4, 35))
- width = float(np.random.uniform(0.05, 0.4))
- return {
- "defect_geometry_type": "line",
- "width_mm": round(width, 2),
- "height_mm": 0.0,
- "length_mm": round(length, 2),
- "angle_deg": round(float(np.random.uniform(0, 180)), 1),
- "area_mm2": round(length * width, 2),
- }
- if defect_type in ["漏光", "色差", "气泡"]:
- width = float(np.random.uniform(1.5, 12))
- height = float(np.random.uniform(1.5, 16))
- return {
- "defect_geometry_type": "region",
- "width_mm": round(width, 2),
- "height_mm": round(height, 2),
- "length_mm": 0.0,
- "angle_deg": 0.0,
- "area_mm2": round(width * height, 2),
- }
- size = float(np.random.uniform(0.05, 0.8))
- return {
- "defect_geometry_type": "point",
- "width_mm": round(size, 2),
- "height_mm": round(size, 2),
- "length_mm": 0.0,
- "angle_deg": 0.0,
- "area_mm2": round(size * size, 3),
- }
- def generate_data():
- """生成完整的缺陷数据集"""
- print("生成模拟缺陷数据...")
- # 生成空间位置
- positions = generate_panel_positions()
- n_defects = len(positions)
- print(f" 生成 {n_defects} 个缺陷记录")
- # 生成时间
- timestamps = generate_time_distribution(n_defects)
- # 分配设备和座号
- equipment_ids, seat_ids = assign_equipment_and_seat(n_defects, timestamps)
- # 生成缺陷类型(考虑座号偏差)
- defect_type_list = generate_defect_type_with_seat_bias(n_defects, equipment_ids, seat_ids)
- # 生成面板ID (模拟500块面板)
- panel_ids = [f"PANEL-{np.random.randint(1, NUM_PANELS+1):04d}" for _ in range(n_defects)]
- # 生成批次号
- batch_ids = [f"BATCH-{ts.strftime('%Y%m%d')}" for ts in timestamps]
- # 生成严重程度
- severities = [generate_severity(dt) for dt in defect_type_list]
- # 生成检测工位
- inspection_stations = [np.random.choice(INSPECTION_STATIONS, p=[0.4, 0.4, 0.2]) for _ in range(n_defects)]
- geometries = [generate_geometry(dt) for dt in defect_type_list]
- x_values = [round(p[0], 2) for p in positions]
- y_values = [round(p[1], 2) for p in positions]
- clean_equipment_ids = [f"CLN-{np.random.choice(['A01', 'A02'])}" for _ in range(n_defects)]
- clean_slot_ids = [f"SLOT-{np.random.randint(1, 13):02d}" for _ in range(n_defects)]
- bond_equipment_ids = [f"BON-{np.random.choice(['A01', 'A02', 'B01'])}" for _ in range(n_defects)]
- bond_head_ids = [f"HEAD-{np.random.randint(1, 7):02d}" for _ in range(n_defects)]
- recipe_ids = [f"RCP-LAM-{eq[-3:]}" for eq in equipment_ids]
- lam_fixture_ids = [f"FIX-{eq[-3:]}-{np.random.randint(1, 5):02d}" for eq in equipment_ids]
- lam_jig_ids = [f"JIG-{seat}" for seat in seat_ids]
- lam_nozzle_ids = [f"NZ-{np.random.randint(1, 21):02d}" for _ in range(n_defects)]
- material_lot_glass = [f"GLS-{ts.strftime('%Y%m%d')}-{np.random.randint(1, 4)}" for ts in timestamps]
- material_lot_oca = [f"OCA-{ts.strftime('%Y%W')}-{np.random.randint(1, 6)}" for ts in timestamps]
- material_lot_polarizer = [f"POL-{ts.strftime('%Y%W')}-{np.random.randint(1, 5)}" for ts in timestamps]
- material_lot_cover = [f"CVR-{ts.strftime('%Y%W')}-{np.random.randint(1, 4)}" for ts in timestamps]
- # 创建 DataFrame
- df = pd.DataFrame({
- "defect_id": [f"D{i+1:05d}" for i in range(n_defects)],
- "panel_id": panel_ids,
- "batch_id": batch_ids,
- "equipment_id": equipment_ids,
- "seat_id": seat_ids,
- "inspection_station": inspection_stations,
- "timestamp": timestamps,
- "defect_type": defect_type_list,
- "severity": severities,
- "x_mm": x_values,
- "y_mm": y_values,
- "panel_width_mm": PANEL_WIDTH,
- "panel_height_mm": PANEL_HEIGHT,
- "hour": [ts.hour for ts in timestamps],
- "shift": ["夜班" if (ts.hour >= 17 or ts.hour < 8) else "白班" for ts in timestamps],
- "day": [ts.strftime("%Y-%m-%d") for ts in timestamps],
- "defect_geometry_type": [g["defect_geometry_type"] for g in geometries],
- "width_mm": [g["width_mm"] for g in geometries],
- "height_mm": [g["height_mm"] for g in geometries],
- "length_mm": [g["length_mm"] for g in geometries],
- "angle_deg": [g["angle_deg"] for g in geometries],
- "area_mm2": [g["area_mm2"] for g in geometries],
- "bbox_x_min_mm": [max(0, x - g["width_mm"] / 2) for x, g in zip(x_values, geometries)],
- "bbox_y_min_mm": [max(0, y - g["height_mm"] / 2) for y, g in zip(y_values, geometries)],
- "bbox_x_max_mm": [min(PANEL_WIDTH, x + g["width_mm"] / 2) for x, g in zip(x_values, geometries)],
- "bbox_y_max_mm": [min(PANEL_HEIGHT, y + g["height_mm"] / 2) for y, g in zip(y_values, geometries)],
- "contour_json": "",
- "process_step": "前贴附",
- "recipe_id": recipe_ids,
- "operator_id": [f"OP-{np.random.randint(1, 9):02d}" for _ in range(n_defects)],
- "clean_equipment_id": clean_equipment_ids,
- "clean_slot_id": clean_slot_ids,
- "clean_recipe_id": [f"RCP-CLN-{eq[-3:]}" for eq in clean_equipment_ids],
- "lam_equipment_id": equipment_ids,
- "lam_seat_id": seat_ids,
- "lam_fixture_id": lam_fixture_ids,
- "lam_jig_id": lam_jig_ids,
- "lam_nozzle_id": lam_nozzle_ids,
- "bond_equipment_id": bond_equipment_ids,
- "bond_head_id": bond_head_ids,
- "bond_recipe_id": [f"RCP-BON-{eq[-3:]}" for eq in bond_equipment_ids],
- "aoi_equipment_id": inspection_stations,
- "aoi_station_id": inspection_stations,
- "material_lot_glass": material_lot_glass,
- "material_lot_oca": material_lot_oca,
- "material_lot_polarizer": material_lot_polarizer,
- "material_lot_cover": material_lot_cover,
- })
- # 保存
- df.to_csv(OUTPUT_FILE, index=False, encoding="utf-8-sig")
- print(f"数据已保存到 {OUTPUT_FILE}")
- # 保存统计摘要
- types = list(DEFECT_TYPES.keys())
- summary = {
- "total_defects": n_defects,
- "total_panels": NUM_PANELS,
- "defect_types": {t: int((df["defect_type"] == t).sum()) for t in types},
- "severity_distribution": {s: int((df["severity"] == s).sum()) for s in ["轻微", "中等", "严重"]},
- "shift_distribution": {s: int((df["shift"] == s).sum()) for s in ["白班", "夜班"]},
- "equipment_distribution": {e: int((df["equipment_id"] == e).sum()) for e in LAMINATION_EQUIPMENT.keys()},
- "date_range": {
- "start": START_DATE.strftime("%Y-%m-%d"),
- "end": END_DATE.strftime("%Y-%m-%d"),
- },
- "lamination_config": {
- "equipment": list(LAMINATION_EQUIPMENT.keys()),
- "seat_bias": {k: v["defect_type"] for k, v in SEAT_DEFECT_BIAS.items()},
- },
- }
- with open("data_summary.json", "w", encoding="utf-8") as f:
- json.dump(summary, f, ensure_ascii=False, indent=2)
- print(f"统计摘要已保存到 data_summary.json")
- return df
- if __name__ == "__main__":
- df = generate_data()
- print(f"\n数据概览:")
- print(f" 总记录数: {len(df)}")
- print(f" 缺陷类型数: {df['defect_type'].nunique()}")
- print(f" 面板数量: {df['panel_id'].nunique()}")
- print(f" 批次数量: {df['batch_id'].nunique()}")
- print(f" 设备数量: {df['equipment_id'].nunique()}")
- print(f" 座号数量: {df['seat_id'].nunique()}")
- print(f"\n缺陷类型分布:")
- print(df["defect_type"].value_counts().to_string())
- print(f"\n设备分布:")
- print(df["equipment_id"].value_counts().to_string())
- print(f"\n班次分布:")
- print(df["shift"].value_counts().to_string())
|