| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- """缺陷数据契约与标准化。"""
- import pandas as pd
- CORE_REQUIRED_COLUMNS = [
- "defect_id",
- "panel_id",
- "batch_id",
- "equipment_id",
- "seat_id",
- "inspection_station",
- "timestamp",
- "defect_type",
- "severity",
- "x_mm",
- "y_mm",
- "panel_width_mm",
- "panel_height_mm",
- "hour",
- "shift",
- "day",
- ]
- INDUSTRY_OPTIONAL_COLUMNS = [
- "defect_geometry_type",
- "width_mm",
- "height_mm",
- "length_mm",
- "angle_deg",
- "area_mm2",
- "bbox_x_min_mm",
- "bbox_y_min_mm",
- "bbox_x_max_mm",
- "bbox_y_max_mm",
- "contour_json",
- "process_step",
- "recipe_id",
- "operator_id",
- "clean_equipment_id",
- "clean_slot_id",
- "clean_recipe_id",
- "lam_equipment_id",
- "lam_seat_id",
- "lam_fixture_id",
- "lam_jig_id",
- "lam_nozzle_id",
- "bond_equipment_id",
- "bond_head_id",
- "bond_recipe_id",
- "aoi_equipment_id",
- "aoi_station_id",
- "material_lot_glass",
- "material_lot_oca",
- "material_lot_polarizer",
- "material_lot_cover",
- ]
- TEMPLATE_COLUMNS = CORE_REQUIRED_COLUMNS + INDUSTRY_OPTIONAL_COLUMNS
- def get_missing_required_columns(df):
- """返回缺失的核心必填字段。"""
- return [column for column in CORE_REQUIRED_COLUMNS if column not in df.columns]
- def normalize_defect_schema(df):
- """补齐 3C 面板行业扩展字段,并保持旧版 CSV 可用。"""
- normalized = df.copy()
- defaults = {
- "defect_geometry_type": "point",
- "width_mm": 0.0,
- "height_mm": 0.0,
- "length_mm": 0.0,
- "angle_deg": 0.0,
- "area_mm2": 0.0,
- "bbox_x_min_mm": normalized.get("x_mm", 0.0),
- "bbox_y_min_mm": normalized.get("y_mm", 0.0),
- "bbox_x_max_mm": normalized.get("x_mm", 0.0),
- "bbox_y_max_mm": normalized.get("y_mm", 0.0),
- "contour_json": "",
- "process_step": "前制程",
- "recipe_id": "",
- "operator_id": "",
- "clean_equipment_id": "",
- "clean_slot_id": "",
- "clean_recipe_id": "",
- "lam_equipment_id": normalized.get("equipment_id", ""),
- "lam_seat_id": normalized.get("seat_id", ""),
- "lam_fixture_id": "",
- "lam_jig_id": "",
- "lam_nozzle_id": "",
- "bond_equipment_id": "",
- "bond_head_id": "",
- "bond_recipe_id": "",
- "aoi_equipment_id": normalized.get("inspection_station", ""),
- "aoi_station_id": normalized.get("inspection_station", ""),
- "material_lot_glass": "",
- "material_lot_oca": "",
- "material_lot_polarizer": "",
- "material_lot_cover": "",
- }
- for column, value in defaults.items():
- if column not in normalized.columns:
- normalized[column] = value
- if "timestamp" in normalized.columns:
- normalized["timestamp"] = pd.to_datetime(normalized["timestamp"], errors="coerce")
- if "hour" in normalized.columns:
- normalized["hour"] = normalized["hour"].fillna(normalized["timestamp"].dt.hour)
- if "day" in normalized.columns:
- normalized["day"] = normalized["day"].fillna(normalized["timestamp"].dt.strftime("%Y-%m-%d"))
- for column in ["width_mm", "height_mm", "length_mm", "angle_deg", "area_mm2"]:
- normalized[column] = pd.to_numeric(normalized[column], errors="coerce").fillna(0.0)
- return normalized
|