"""缺陷数据契约与标准化。""" import pandas as pd CORE_REQUIRED_COLUMNS = [ "defect_id", "panel_id", "batch_id", "equipment_id", "seat_id", "inspection_station", "timestamp", "defect_type", "severity", "x_mm", "y_mm", "panel_width_mm", "panel_height_mm", "hour", "shift", "day", ] INDUSTRY_OPTIONAL_COLUMNS = [ "defect_geometry_type", "width_mm", "height_mm", "length_mm", "angle_deg", "area_mm2", "bbox_x_min_mm", "bbox_y_min_mm", "bbox_x_max_mm", "bbox_y_max_mm", "contour_json", "process_step", "recipe_id", "operator_id", "clean_equipment_id", "clean_slot_id", "clean_recipe_id", "lam_equipment_id", "lam_seat_id", "lam_fixture_id", "lam_jig_id", "lam_nozzle_id", "bond_equipment_id", "bond_head_id", "bond_recipe_id", "aoi_equipment_id", "aoi_station_id", "material_lot_glass", "material_lot_oca", "material_lot_polarizer", "material_lot_cover", ] TEMPLATE_COLUMNS = CORE_REQUIRED_COLUMNS + INDUSTRY_OPTIONAL_COLUMNS def get_missing_required_columns(df): """返回缺失的核心必填字段。""" return [column for column in CORE_REQUIRED_COLUMNS if column not in df.columns] def normalize_defect_schema(df): """补齐 3C 面板行业扩展字段,并保持旧版 CSV 可用。""" normalized = df.copy() defaults = { "defect_geometry_type": "point", "width_mm": 0.0, "height_mm": 0.0, "length_mm": 0.0, "angle_deg": 0.0, "area_mm2": 0.0, "bbox_x_min_mm": normalized.get("x_mm", 0.0), "bbox_y_min_mm": normalized.get("y_mm", 0.0), "bbox_x_max_mm": normalized.get("x_mm", 0.0), "bbox_y_max_mm": normalized.get("y_mm", 0.0), "contour_json": "", "process_step": "前制程", "recipe_id": "", "operator_id": "", "clean_equipment_id": "", "clean_slot_id": "", "clean_recipe_id": "", "lam_equipment_id": normalized.get("equipment_id", ""), "lam_seat_id": normalized.get("seat_id", ""), "lam_fixture_id": "", "lam_jig_id": "", "lam_nozzle_id": "", "bond_equipment_id": "", "bond_head_id": "", "bond_recipe_id": "", "aoi_equipment_id": normalized.get("inspection_station", ""), "aoi_station_id": normalized.get("inspection_station", ""), "material_lot_glass": "", "material_lot_oca": "", "material_lot_polarizer": "", "material_lot_cover": "", } for column, value in defaults.items(): if column not in normalized.columns: normalized[column] = value if "timestamp" in normalized.columns: normalized["timestamp"] = pd.to_datetime(normalized["timestamp"]) if "hour" in normalized.columns: normalized["hour"] = normalized["hour"].fillna(normalized["timestamp"].dt.hour) if "day" in normalized.columns: normalized["day"] = normalized["day"].fillna(normalized["timestamp"].dt.strftime("%Y-%m-%d")) for column in ["width_mm", "height_mm", "length_mm", "angle_deg", "area_mm2"]: normalized[column] = pd.to_numeric(normalized[column], errors="coerce").fillna(0.0) return normalized