schemas.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. """缺陷数据契约与标准化。"""
  2. import pandas as pd
  3. CORE_REQUIRED_COLUMNS = [
  4. "defect_id",
  5. "panel_id",
  6. "batch_id",
  7. "equipment_id",
  8. "seat_id",
  9. "inspection_station",
  10. "timestamp",
  11. "defect_type",
  12. "severity",
  13. "x_mm",
  14. "y_mm",
  15. "panel_width_mm",
  16. "panel_height_mm",
  17. "hour",
  18. "shift",
  19. "day",
  20. ]
  21. INDUSTRY_OPTIONAL_COLUMNS = [
  22. "defect_geometry_type",
  23. "width_mm",
  24. "height_mm",
  25. "length_mm",
  26. "angle_deg",
  27. "area_mm2",
  28. "bbox_x_min_mm",
  29. "bbox_y_min_mm",
  30. "bbox_x_max_mm",
  31. "bbox_y_max_mm",
  32. "contour_json",
  33. "process_step",
  34. "recipe_id",
  35. "operator_id",
  36. "clean_equipment_id",
  37. "clean_slot_id",
  38. "clean_recipe_id",
  39. "lam_equipment_id",
  40. "lam_seat_id",
  41. "lam_fixture_id",
  42. "lam_jig_id",
  43. "lam_nozzle_id",
  44. "bond_equipment_id",
  45. "bond_head_id",
  46. "bond_recipe_id",
  47. "aoi_equipment_id",
  48. "aoi_station_id",
  49. "material_lot_glass",
  50. "material_lot_oca",
  51. "material_lot_polarizer",
  52. "material_lot_cover",
  53. ]
  54. TEMPLATE_COLUMNS = CORE_REQUIRED_COLUMNS + INDUSTRY_OPTIONAL_COLUMNS
  55. def get_missing_required_columns(df):
  56. """返回缺失的核心必填字段。"""
  57. return [column for column in CORE_REQUIRED_COLUMNS if column not in df.columns]
  58. def normalize_defect_schema(df):
  59. """补齐 3C 面板行业扩展字段,并保持旧版 CSV 可用。"""
  60. normalized = df.copy()
  61. defaults = {
  62. "defect_geometry_type": "point",
  63. "width_mm": 0.0,
  64. "height_mm": 0.0,
  65. "length_mm": 0.0,
  66. "angle_deg": 0.0,
  67. "area_mm2": 0.0,
  68. "bbox_x_min_mm": normalized.get("x_mm", 0.0),
  69. "bbox_y_min_mm": normalized.get("y_mm", 0.0),
  70. "bbox_x_max_mm": normalized.get("x_mm", 0.0),
  71. "bbox_y_max_mm": normalized.get("y_mm", 0.0),
  72. "contour_json": "",
  73. "process_step": "前制程",
  74. "recipe_id": "",
  75. "operator_id": "",
  76. "clean_equipment_id": "",
  77. "clean_slot_id": "",
  78. "clean_recipe_id": "",
  79. "lam_equipment_id": normalized.get("equipment_id", ""),
  80. "lam_seat_id": normalized.get("seat_id", ""),
  81. "lam_fixture_id": "",
  82. "lam_jig_id": "",
  83. "lam_nozzle_id": "",
  84. "bond_equipment_id": "",
  85. "bond_head_id": "",
  86. "bond_recipe_id": "",
  87. "aoi_equipment_id": normalized.get("inspection_station", ""),
  88. "aoi_station_id": normalized.get("inspection_station", ""),
  89. "material_lot_glass": "",
  90. "material_lot_oca": "",
  91. "material_lot_polarizer": "",
  92. "material_lot_cover": "",
  93. }
  94. for column, value in defaults.items():
  95. if column not in normalized.columns:
  96. normalized[column] = value
  97. if "timestamp" in normalized.columns:
  98. normalized["timestamp"] = pd.to_datetime(normalized["timestamp"])
  99. if "hour" in normalized.columns:
  100. normalized["hour"] = normalized["hour"].fillna(normalized["timestamp"].dt.hour)
  101. if "day" in normalized.columns:
  102. normalized["day"] = normalized["day"].fillna(normalized["timestamp"].dt.strftime("%Y-%m-%d"))
  103. for column in ["width_mm", "height_mm", "length_mm", "angle_deg", "area_mm2"]:
  104. normalized[column] = pd.to_numeric(normalized[column], errors="coerce").fillna(0.0)
  105. return normalized