app_utils.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. """缺陷分析页面的可测试业务逻辑。"""
  2. import numpy as np
  3. import pandas as pd
  4. from defect_analysis.ml.model_bundle import create_model_bundle
  5. from defect_analysis.ml.predict import predict_key_factors
  6. from defect_analysis.root_cause import EXTENDED_ROOT_CAUSE_DIMENSIONS, build_extended_root_causes
  7. from defect_analysis.schemas import (
  8. CORE_REQUIRED_COLUMNS,
  9. INDUSTRY_OPTIONAL_COLUMNS,
  10. TEMPLATE_COLUMNS,
  11. get_missing_required_columns,
  12. normalize_defect_schema,
  13. )
  14. DEFECT_SOP_RECOMMENDATIONS = {
  15. "划痕": ["检查搬运轨道、吸嘴和治具接触面", "复核清洗滚刷与擦拭工位是否有硬质颗粒"],
  16. "气泡": ["检查贴合压力、真空度、OCA 状态和贴合速度", "复核贴合前清洁与材料开封时长"],
  17. "漏光": ["检查边缘贴合、背光组装、框胶和压合均匀性", "复核四角/边缘区应力与夹持状态"],
  18. "色差": ["检查背光、偏光片批次、贴合应力和老化条件", "对比同批材料与相邻工艺参数"],
  19. "异物": ["检查洁净度、清洗段、静电控制和材料暴露时间", "追溯同批材料与工位环境记录"],
  20. "亮点": ["复核点灯/AOI 判定、TFT 像素缺陷和异物压伤", "抽查高发区域是否存在压接或污染"],
  21. "暗点": ["复核点灯/AOI 判定、TFT 像素缺陷和异物压伤", "检查绑定/驱动相关区域异常"],
  22. "裂纹": ["立即检查切割、搬运、夹持和跌落冲击风险", "对同批面板执行 Hold 与复检"],
  23. }
  24. def normalize_date_bounds(start_date, end_date):
  25. """把日期范围转换成左闭右开的时间边界,确保结束日期整天被包含。"""
  26. start_ts = pd.Timestamp(start_date).normalize()
  27. end_exclusive = pd.Timestamp(end_date).normalize() + pd.Timedelta(days=1)
  28. return start_ts, end_exclusive
  29. def apply_defect_filters(
  30. df,
  31. *,
  32. start_date,
  33. end_date,
  34. selected_types,
  35. selected_batches,
  36. selected_equipment,
  37. selected_seats,
  38. selected_shift="全部",
  39. selected_severity="全部",
  40. ):
  41. """应用页面筛选条件。"""
  42. start_ts, end_exclusive = normalize_date_bounds(start_date, end_date)
  43. mask = (
  44. (df["timestamp"] >= start_ts)
  45. & (df["timestamp"] < end_exclusive)
  46. & (df["defect_type"].isin(selected_types))
  47. & (df["batch_id"].isin(selected_batches))
  48. & (df["equipment_id"].isin(selected_equipment))
  49. )
  50. if selected_shift != "全部":
  51. mask &= df["shift"] == selected_shift
  52. if selected_severity != "全部":
  53. mask &= df["severity"] == selected_severity
  54. if selected_seats:
  55. mask &= df["seat_id"].isin(selected_seats)
  56. return df[mask].copy()
  57. def classify_panel_zone(df):
  58. """按 3C 面板行业常用语义把坐标映射到关键区域。"""
  59. width = df.get("panel_width_mm", pd.Series(155.0, index=df.index)).replace(0, np.nan)
  60. height = df.get("panel_height_mm", pd.Series(340.0, index=df.index)).replace(0, np.nan)
  61. x = df.get("x_mm", width * 0.5)
  62. y = df.get("y_mm", height * 0.5)
  63. x_norm = x / width
  64. y_norm = y / height
  65. zones = []
  66. for x, y in zip(x_norm.fillna(0.5), y_norm.fillna(0.5)):
  67. labels = []
  68. if x <= 0.1:
  69. labels.append("左边缘区")
  70. if x >= 0.9:
  71. labels.append("右边缘区")
  72. if y <= 0.1:
  73. labels.append("下边缘区")
  74. if y >= 0.9:
  75. labels.append("上边缘区")
  76. if (x <= 0.12 or x >= 0.88) and (y <= 0.12 or y >= 0.88):
  77. labels.append("角落区")
  78. if 0.68 <= y <= 0.88 and 0.25 <= x <= 0.75:
  79. labels.append("FPC/绑定区")
  80. if not labels:
  81. labels.append("显示中心区")
  82. zones.append(" / ".join(labels))
  83. return pd.Series(zones, index=df.index, name="panel_zone")
  84. def calculate_kpis(source_df, filtered_df):
  85. """基于当前筛选结果计算页面 KPI。"""
  86. total_panels_inspected = filtered_df["panel_id"].nunique()
  87. defective_panels = filtered_df["panel_id"].nunique()
  88. total_defects = len(filtered_df)
  89. critical_defects = int((filtered_df["severity"] == "严重").sum()) if total_defects else 0
  90. top_defect_type = filtered_df["defect_type"].mode().iloc[0] if total_defects else "-"
  91. yield_rate = (1 - defective_panels / max(total_panels_inspected, 1)) * 100
  92. return {
  93. "total_panels_inspected": int(total_panels_inspected),
  94. "defective_panels": int(defective_panels),
  95. "yield_rate": float(yield_rate),
  96. "total_defects": int(total_defects),
  97. "critical_defects": int(critical_defects),
  98. "top_defect_type": top_defect_type,
  99. }
  100. def calculate_spc_metrics(df):
  101. """计算 SPC 所需数据,防止模拟分母造成非法概率。"""
  102. daily = df.groupby("day").agg(
  103. total_defects=("defect_id", "count"),
  104. panels_with_defects=("panel_id", "nunique"),
  105. ).reset_index()
  106. daily["day"] = pd.to_datetime(daily["day"])
  107. daily = daily.sort_values("day").reset_index(drop=True)
  108. if len(daily) < 2:
  109. return {
  110. "daily": daily,
  111. "p_bar": 0.0,
  112. "ucl": 0.0,
  113. "lcl": 0.0,
  114. "uwl": 0.0,
  115. "lwl": 0.0,
  116. "sigma_p": 0.0,
  117. }
  118. total_days = (df["timestamp"].max() - df["timestamp"].min()).days + 1
  119. total_unique_panels = df["panel_id"].nunique()
  120. estimated = max(total_unique_panels // max(total_days // 7, 1), 1)
  121. daily["estimated_inspected"] = np.maximum(estimated, daily["panels_with_defects"])
  122. daily["defect_rate"] = (
  123. daily["panels_with_defects"] / daily["estimated_inspected"]
  124. ).clip(lower=0, upper=1)
  125. p_bar = float(np.clip(daily["defect_rate"].mean(), 0, 1))
  126. n_avg = float(daily["estimated_inspected"].mean())
  127. sigma_p = float(np.sqrt(max(p_bar * (1 - p_bar), 0) / n_avg)) if n_avg > 0 else 0.0
  128. return {
  129. "daily": daily,
  130. "p_bar": p_bar,
  131. "ucl": min(1.0, p_bar + 3 * sigma_p),
  132. "lcl": max(0.0, p_bar - 3 * sigma_p),
  133. "uwl": min(1.0, p_bar + 2 * sigma_p),
  134. "lwl": max(0.0, p_bar - 2 * sigma_p),
  135. "sigma_p": sigma_p,
  136. }
  137. def build_diagnostic_dashboard(df):
  138. """生成诊断驾驶舱需要的摘要、根因候选和趋势数据。"""
  139. total_defects = len(df)
  140. if total_defects == 0:
  141. return {
  142. "severity_level": "正常",
  143. "top_defect_type": "-",
  144. "top_defect_share": 0.0,
  145. "serious_share": 0.0,
  146. "root_causes": pd.DataFrame(),
  147. "extended_root_causes": pd.DataFrame(),
  148. "daily_trend": pd.DataFrame(),
  149. "pareto": pd.DataFrame(),
  150. "primary_recommendation": "当前筛选条件下没有缺陷记录。",
  151. }
  152. type_counts = df["defect_type"].value_counts()
  153. zones = classify_panel_zone(df)
  154. zone_counts = zones.value_counts()
  155. top_defect_type = type_counts.index[0]
  156. top_defect_share = float(type_counts.iloc[0] / total_defects)
  157. top_zone = zone_counts.index[0]
  158. top_zone_share = float(zone_counts.iloc[0] / total_defects)
  159. serious_share = float((df["severity"] == "严重").sum() / total_defects)
  160. root_causes = (
  161. df.groupby(["equipment_id", "seat_id"])
  162. .agg(
  163. 缺陷数=("defect_id", "count"),
  164. 涉及面板=("panel_id", "nunique"),
  165. 主要缺陷=("defect_type", lambda s: s.mode().iloc[0]),
  166. 严重数=("severity", lambda s: int((s == "严重").sum())),
  167. )
  168. .reset_index()
  169. )
  170. root_causes["根因候选"] = root_causes["equipment_id"] + " / " + root_causes["seat_id"]
  171. root_causes["占比"] = root_causes["缺陷数"] / total_defects
  172. root_causes["严重占比"] = root_causes["严重数"] / root_causes["缺陷数"].clip(lower=1)
  173. equipment_totals = df.groupby("equipment_id")["defect_id"].count()
  174. equipment_seat_counts = df.groupby("equipment_id")["seat_id"].nunique().clip(lower=1)
  175. root_causes["期望缺陷数"] = root_causes["equipment_id"].map(
  176. equipment_totals / equipment_seat_counts
  177. ).clip(lower=0.001)
  178. root_causes["异常倍数"] = (root_causes["缺陷数"] / root_causes["期望缺陷数"]).round(2)
  179. count_score = root_causes["缺陷数"] / root_causes["缺陷数"].max()
  180. panel_score = root_causes["涉及面板"] / df["panel_id"].nunique()
  181. lift_score = (root_causes["异常倍数"] / 3).clip(upper=1)
  182. root_causes["风险分"] = (
  183. count_score * 55 + lift_score * 25 + root_causes["严重占比"] * 15 + panel_score * 5
  184. ).round(1)
  185. root_causes = root_causes.sort_values(["风险分", "缺陷数"], ascending=False).head(8)
  186. root_causes = root_causes[
  187. ["根因候选", "缺陷数", "占比", "异常倍数", "涉及面板", "主要缺陷", "严重占比", "风险分"]
  188. ].reset_index(drop=True)
  189. pareto = type_counts.rename_axis("缺陷类型").reset_index(name="缺陷数")
  190. pareto["占比"] = pareto["缺陷数"] / total_defects
  191. pareto["累计占比"] = pareto["占比"].cumsum()
  192. daily_trend = df.groupby("day").size().rename("缺陷数").reset_index()
  193. daily_trend["day"] = pd.to_datetime(daily_trend["day"])
  194. daily_trend = daily_trend.sort_values("day")
  195. extended_root_causes = build_extended_root_causes(df)
  196. if serious_share >= 0.2 or (len(root_causes) > 0 and root_causes.iloc[0]["占比"] >= 0.15):
  197. severity_level = "严重"
  198. elif serious_share >= 0.1 or top_defect_share >= 0.35:
  199. severity_level = "关注"
  200. else:
  201. severity_level = "正常"
  202. if len(root_causes) > 0:
  203. top_root = root_causes.iloc[0]
  204. primary_recommendation = (
  205. f"优先排查 {top_root['根因候选']},该组合贡献 {top_root['占比']:.1%} "
  206. f"缺陷,异常倍数 {top_root['异常倍数']:.2f}x,主要类型为 {top_root['主要缺陷']}。"
  207. )
  208. else:
  209. primary_recommendation = f"优先排查 {top_defect_type} 相关工艺参数。"
  210. return {
  211. "severity_level": severity_level,
  212. "top_defect_type": top_defect_type,
  213. "top_defect_share": top_defect_share,
  214. "top_zone": top_zone,
  215. "top_zone_share": top_zone_share,
  216. "zone_distribution": zone_counts.rename_axis("区域").reset_index(name="缺陷数"),
  217. "serious_share": serious_share,
  218. "root_causes": root_causes,
  219. "extended_root_causes": extended_root_causes,
  220. "daily_trend": daily_trend,
  221. "pareto": pareto,
  222. "primary_recommendation": primary_recommendation,
  223. }
  224. def detect_industry_patterns(df):
  225. """识别面板行业常见缺陷模式。"""
  226. if df.empty:
  227. return []
  228. patterns = []
  229. zones = classify_panel_zone(df)
  230. zone_share = zones.value_counts(normalize=True)
  231. if any(idx != "显示中心区" and share >= 0.35 for idx, share in zone_share.items()):
  232. patterns.append(f"区域集中: {zone_share.index[0]} 占比 {zone_share.iloc[0]:.1%}")
  233. coord_df = df.copy()
  234. coord_df["x_bin"] = (coord_df["x_mm"] // 5).astype(int)
  235. coord_df["y_bin"] = (coord_df["y_mm"] // 5).astype(int)
  236. repeat = coord_df.groupby(["x_bin", "y_bin"])["panel_id"].nunique().max()
  237. if repeat >= min(3, max(2, df["panel_id"].nunique())):
  238. patterns.append("跨面板重复坐标: 疑似治具、吸嘴、压头或固定接触点异常")
  239. if df["x_mm"].nunique() >= 3 and df["y_mm"].nunique() >= 3 and len(df) >= 6:
  240. corr = abs(pd.Series(df["x_mm"]).corr(pd.Series(df["y_mm"])))
  241. if pd.notna(corr) and corr >= 0.85:
  242. patterns.append("线状分布: 疑似搬运划伤、滚轮轨迹或线性压伤")
  243. batch_share = df["batch_id"].value_counts(normalize=True).iloc[0]
  244. if batch_share >= 0.5 and df["batch_id"].nunique() > 1:
  245. patterns.append(f"批次集中: {df['batch_id'].value_counts().index[0]} 占比 {batch_share:.1%}")
  246. return patterns or ["随机点状分布: 更偏向材料、环境尘埃或偶发检出"]
  247. def generate_industry_diagnosis(df, dashboard):
  248. """生成 3C 面板行业化诊断结论和排查建议。"""
  249. if df.empty:
  250. return {
  251. "headline": "当前筛选条件下没有可诊断缺陷。",
  252. "patterns": [],
  253. "recommendations": ["放宽筛选条件或上传更多检测记录后再诊断。"],
  254. }
  255. top_type = dashboard["top_defect_type"]
  256. top_zone = dashboard.get("top_zone", classify_panel_zone(df).value_counts().index[0])
  257. top_root = dashboard["root_causes"].iloc[0]["根因候选"] if len(dashboard["root_causes"]) else "当前筛选范围"
  258. patterns = detect_industry_patterns(df)
  259. recommendations = []
  260. if top_type in DEFECT_SOP_RECOMMENDATIONS:
  261. recommendations.extend(DEFECT_SOP_RECOMMENDATIONS[top_type])
  262. if "边缘" in top_zone or "角落" in top_zone:
  263. recommendations.append("优先复核边缘贴合、切割/搬运夹持、吸附接触面和四角应力状态")
  264. if "FPC" in top_zone or "绑定" in top_zone:
  265. recommendations.append("重点检查绑定压力、FPC/COF 区域异物、压接参数和 AOI 复判样本")
  266. if any("跨面板重复" in p for p in patterns):
  267. recommendations.append("对高发座号对应治具、吸嘴、压头做点检,并抽查同坐标复现样本")
  268. if dashboard["serious_share"] >= 0.2:
  269. recommendations.append("严重缺陷占比较高,建议对相关批次执行 Hold、复检或加严抽样")
  270. deduped = []
  271. for item in recommendations:
  272. if item not in deduped:
  273. deduped.append(item)
  274. headline = (
  275. f"{top_zone} 的 {top_type} 最突出,首要候选为 {top_root}。"
  276. f"建议按工序链路优先排查材料、贴合/搬运接触面和对应治具状态。"
  277. )
  278. return {
  279. "headline": headline,
  280. "patterns": patterns,
  281. "recommendations": deduped[:5],
  282. }
  283. def build_ml_factor_insights(
  284. df,
  285. *,
  286. target_defect_type=None,
  287. target_severity=None,
  288. model_name="random_forest",
  289. top_n=10,
  290. ):
  291. """构建页面可展示的 ML 关键因子、验证指标和特征解释。"""
  292. normalized = normalize_defect_schema(df)
  293. resolved_target_type = target_defect_type
  294. if resolved_target_type is None and not normalized.empty:
  295. resolved_target_type = normalized["defect_type"].mode().iloc[0]
  296. base = {
  297. "target_defect_type": resolved_target_type,
  298. "target_severity": target_severity,
  299. "model_name": model_name,
  300. "key_factors": pd.DataFrame(),
  301. "metrics": {},
  302. "validation_metrics": {},
  303. "feature_importance": [],
  304. "error": None,
  305. }
  306. if normalized.empty:
  307. base["error"] = "当前筛选条件下没有可训练数据。"
  308. return base
  309. try:
  310. base["key_factors"] = predict_key_factors(
  311. normalized,
  312. target_defect_type=resolved_target_type,
  313. target_severity=target_severity,
  314. model_name=model_name,
  315. top_n=top_n,
  316. )
  317. bundle = create_model_bundle(
  318. normalized,
  319. model_name=model_name,
  320. target_defect_type=resolved_target_type,
  321. target_severity=target_severity,
  322. )
  323. except (RuntimeError, ValueError) as exc:
  324. base["error"] = str(exc)
  325. return base
  326. base["metrics"] = bundle["metrics"]
  327. base["validation_metrics"] = bundle["validation_metrics"]
  328. base["feature_importance"] = bundle["feature_importance"]
  329. return base