root_cause.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. """根因候选分析。"""
  2. import pandas as pd
  3. EXTENDED_ROOT_CAUSE_DIMENSIONS = [
  4. "lam_fixture_id",
  5. "lam_jig_id",
  6. "lam_nozzle_id",
  7. "material_lot_oca",
  8. "material_lot_glass",
  9. "material_lot_polarizer",
  10. "clean_equipment_id",
  11. "clean_slot_id",
  12. "bond_equipment_id",
  13. "bond_head_id",
  14. "recipe_id",
  15. ]
  16. def build_extended_root_causes(df, dimensions=None):
  17. """按治具、吸嘴、材料批次等行业维度生成扩展根因候选。"""
  18. dimensions = EXTENDED_ROOT_CAUSE_DIMENSIONS if dimensions is None else dimensions
  19. total_defects = max(len(df), 1)
  20. rows = []
  21. for dimension in dimensions:
  22. if dimension not in df.columns:
  23. continue
  24. series = df[dimension].fillna("").astype(str)
  25. valid = df[series != ""].copy()
  26. if valid.empty:
  27. continue
  28. counts = valid.groupby(dimension).agg(
  29. 缺陷数=("defect_id", "count"),
  30. 涉及面板=("panel_id", "nunique"),
  31. 主要缺陷=("defect_type", lambda s: s.mode().iloc[0] if not s.mode().empty else "-"),
  32. 严重数=("severity", lambda s: int((s == "严重").sum())),
  33. ).reset_index()
  34. expected = len(valid) / max(valid[dimension].nunique(), 1)
  35. counts["维度"] = dimension
  36. counts["候选值"] = counts[dimension].astype(str)
  37. counts["占比"] = counts["缺陷数"] / total_defects
  38. counts["严重占比"] = counts["严重数"] / counts["缺陷数"].clip(lower=1)
  39. counts["异常倍数"] = (counts["缺陷数"] / max(expected, 0.001)).round(2)
  40. count_score = counts["缺陷数"] / counts["缺陷数"].max()
  41. lift_score = (counts["异常倍数"] / 3).clip(upper=1)
  42. counts["风险分"] = (count_score * 55 + lift_score * 30 + counts["严重占比"] * 15).round(1)
  43. rows.append(
  44. counts[["维度", "候选值", "缺陷数", "占比", "异常倍数", "涉及面板", "主要缺陷", "严重占比", "风险分"]]
  45. )
  46. if not rows:
  47. return pd.DataFrame(
  48. columns=["维度", "候选值", "缺陷数", "占比", "异常倍数", "涉及面板", "主要缺陷", "严重占比", "风险分"]
  49. )
  50. return (
  51. pd.concat(rows, ignore_index=True)
  52. .sort_values(["风险分", "缺陷数"], ascending=False)
  53. .head(12)
  54. .reset_index(drop=True)
  55. )