2 ヶ月前 · 8d0fdbece8
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+__pycache__/
			
 
				+*.pyc
			
 
				+*.pyo
			
--- a/app.py
+++ b/app.py
@@ -10,11 +10,19 @@ import matplotlib.pyplot as plt
 
				 import matplotlib.font_manager as fm
			
 
				 import seaborn as sns
			
 
				 import streamlit as st
			
 
				+import plotly.express as px
			
 
				+import plotly.graph_objects as go
			
 
				 import os
			
 
				 from datetime import datetime
			
 
				 from sklearn.cluster import DBSCAN
			
 
				 from sklearn.decomposition import PCA
			
 
				 from sklearn.preprocessing import StandardScaler
			
 
				+from app_utils import (
			
 
				+    apply_defect_filters,
			
 
				+    build_diagnostic_dashboard,
			
 
				+    calculate_kpis,
			
 
				+    calculate_spc_metrics,
			
 
				+)
			
 
				 
			
 
				 # --- 中文字体设置 ---
			
 
				 def setup_chinese_font():
			
@@ -46,21 +54,6 @@ st.set_page_config(
 
				     initial_sidebar_state="expanded"
			
 
				 )
			
 
				 
			
 
				-# --- 加载数据 ---
			
 
				-@st.cache_data(ttl=300)
			
 
				-def load_data():
			
 
				-    """加载并缓存数据"""
			
 
				-    if not os.path.exists("defect_data.csv"):
			
 
				-        st.error("未找到 defect_data.csv，请先运行 generate_data.py 生成数据")
			
 
				-        return None
			
 
				-    df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
			
 
				-    df["timestamp"] = pd.to_datetime(df["timestamp"])
			
 
				-    return df
			
 
				-
			
 
				-df = load_data()
			
 
				-if df is None:
			
 
				-    st.stop()
			
 
				-
			
 
				 # --- 侧边栏 ---
			
 
				 st.sidebar.title("🔍 筛选条件")
			
 
				 
			
@@ -136,7 +129,7 @@ view_mode = st.sidebar.selectbox(
 
				 tab_visibility = {
			
 
				     "操作员": {
			
 
				         "tabs": ["🗺️ 空间集中性", "📊 类型集中性 (帕累托)", "📈 时间集中性",
			
 
				-                 "🏗️ 设备座号集中性", "🔬 缺陷模式识别"],
			
 
				+                 "🏗️ 设备座号集中性", "🔬 缺陷模式识别", "🧭 诊断驾驶舱"],
			
 
				         "show_kpi": True,
			
 
				         "show_export": True,
			
 
				     },
			
@@ -147,7 +140,7 @@ tab_visibility = {
 
				     },
			
 
				     "管理者": {
			
 
				         "tabs": ["🚨 SPC 控制图与预警", "🔬 缺陷模式识别", "💚 设备健康与共性分析",
			
 
				-                 "📊 类型集中性 (帕累托)", "📈 时间集中性"],
			
 
				+                 "📊 类型集中性 (帕累托)", "📈 时间集中性", "🧭 诊断驾驶舱"],
			
 
				         "show_kpi": True,
			
 
				         "show_export": True,
			
 
				     },
			
@@ -182,7 +175,7 @@ selected_shift = st.sidebar.radio("班次", options=shift_options)
 
				 
			
 
				 # 批次
			
 
				 all_batches = sorted(df["batch_id"].unique())
			
 
				-selected_batches = st.sidebar.multiselect("批次", options=all_batches, default=all_batches[:5])
			
 
				+selected_batches = st.sidebar.multiselect("批次", options=all_batches, default=all_batches)
			
 
				 
			
 
				 # 严重程度
			
 
				 all_severities = ["全部", "轻微", "中等", "严重"]
			
@@ -199,30 +192,26 @@ if selected_equipment:
 
				 else:
			
 
				     selected_seats = []
			
 
				 
			
 
				-# 应用筛选
			
 
				-mask = (
			
 
				-    (df["timestamp"] >= start_date) &
			
 
				-    (df["timestamp"] <= end_date) &
			
 
				-    (df["defect_type"].isin(selected_types)) &
			
 
				-    (df["batch_id"].isin(selected_batches)) &
			
 
				-    (df["equipment_id"].isin(selected_equipment))
			
 
				+filtered_df = apply_defect_filters(
			
 
				+    df,
			
 
				+    start_date=start_date,
			
 
				+    end_date=end_date,
			
 
				+    selected_types=selected_types,
			
 
				+    selected_batches=selected_batches,
			
 
				+    selected_equipment=selected_equipment,
			
 
				+    selected_seats=selected_seats,
			
 
				+    selected_shift=selected_shift,
			
 
				+    selected_severity=selected_severity,
			
 
				 )
			
 
				-if selected_shift != "全部":
			
 
				-    mask &= (df["shift"] == selected_shift)
			
 
				-if selected_severity != "全部":
			
 
				-    mask &= (df["severity"] == selected_severity)
			
 
				-if selected_seats:
			
 
				-    mask &= (df["seat_id"].isin(selected_seats))
			
 
				-
			
 
				-filtered_df = df[mask].copy()
			
 
				 
			
 
				 # ========== KPI 看板 ==========
			
 
				-total_panels_inspected = df[df["timestamp"] >= start_date]["panel_id"].nunique()
			
 
				-defective_panels = filtered_df["panel_id"].nunique()
			
 
				-yield_rate = (1 - defective_panels / max(total_panels_inspected, 1)) * 100
			
 
				-total_defects = len(filtered_df)
			
 
				-critical_defects = (filtered_df["severity"] == "严重").sum()
			
 
				-top_defect_type = filtered_df["defect_type"].mode().iloc[0] if len(filtered_df) > 0 else "-"
			
 
				+kpis = calculate_kpis(df, filtered_df)
			
 
				+total_panels_inspected = kpis["total_panels_inspected"]
			
 
				+defective_panels = kpis["defective_panels"]
			
 
				+yield_rate = kpis["yield_rate"]
			
 
				+total_defects = kpis["total_defects"]
			
 
				+critical_defects = kpis["critical_defects"]
			
 
				+top_defect_type = kpis["top_defect_type"]
			
 
				 
			
 
				 kpi1, kpi2, kpi3, kpi4, kpi5, kpi6 = st.columns(6)
			
 
				 kpi1.metric("检测面板数", f"{total_panels_inspected} 块")
			
@@ -271,8 +260,13 @@ st.markdown(f"**数据范围**: {start_date.strftime('%Y-%m-%d')} ~ {end_date.st
 
				 
			
 
				 st.divider()
			
 
				 
			
 
				+if filtered_df.empty:
			
 
				+    st.warning("当前筛选条件下没有缺陷记录，请放宽日期、批次、设备或缺陷类型筛选。")
			
 
				+    st.stop()
			
 
				+
			
 
				 # --- Tab 布局 (按角色动态) ---
			
 
				 ALL_TABS = [
			
 
				+    "🧭 诊断驾驶舱",
			
 
				     "🗺️ 空间集中性",
			
 
				     "📊 类型集中性 (帕累托)",
			
 
				     "📈 时间集中性",
			
@@ -298,6 +292,272 @@ def get_tab(name):
 
				     """获取指定 Tab 容器，如果不可见则返回 None"""
			
 
				     return tab_map.get(name)
			
 
				 
			
 
				+# ========== Tab 0: 诊断驾驶舱 ==========
			
 
				+_t = get_tab("🧭 诊断驾驶舱")
			
 
				+if _t:
			
 
				+    with _t:
			
 
				+        dashboard = build_diagnostic_dashboard(filtered_df)
			
 
				+        level_colors = {
			
 
				+            "严重": ("#7f1d1d", "#fee2e2"),
			
 
				+            "关注": ("#92400e", "#fef3c7"),
			
 
				+            "正常": ("#14532d", "#dcfce7"),
			
 
				+        }
			
 
				+        level_fg, level_bg = level_colors.get(dashboard["severity_level"], ("#334155", "#e2e8f0"))
			
 
				+
			
 
				+        st.markdown(
			
 
				+            """
			
 
				+            <style>
			
 
				+            .diag-hero {
			
 
				+                padding: 24px 28px;
			
 
				+                border-radius: 24px;
			
 
				+                background:
			
 
				+                    radial-gradient(circle at 15% 15%, rgba(20, 184, 166, .18), transparent 28%),
			
 
				+                    linear-gradient(135deg, #0f172a 0%, #12343b 52%, #294936 100%);
			
 
				+                color: #f8fafc;
			
 
				+                box-shadow: 0 18px 45px rgba(15, 23, 42, .18);
			
 
				+                margin-bottom: 18px;
			
 
				+            }
			
 
				+            .diag-hero h2 { margin: 0 0 8px 0; font-size: 30px; letter-spacing: .02em; }
			
 
				+            .diag-hero p { margin: 0; color: #cbd5e1; font-size: 15px; }
			
 
				+            .diag-badge {
			
 
				+                display: inline-flex;
			
 
				+                align-items: center;
			
 
				+                padding: 6px 12px;
			
 
				+                border-radius: 999px;
			
 
				+                font-weight: 700;
			
 
				+                margin-bottom: 12px;
			
 
				+            }
			
 
				+            .diag-card {
			
 
				+                padding: 18px 18px;
			
 
				+                border-radius: 18px;
			
 
				+                border: 1px solid #dbe4e7;
			
 
				+                background: linear-gradient(180deg, #ffffff 0%, #f8fafc 100%);
			
 
				+                min-height: 128px;
			
 
				+            }
			
 
				+            .diag-card .label { color: #64748b; font-size: 13px; margin-bottom: 8px; }
			
 
				+            .diag-card .value { color: #0f172a; font-size: 26px; font-weight: 800; line-height: 1.1; }
			
 
				+            .diag-card .hint { color: #475569; font-size: 13px; margin-top: 10px; }
			
 
				+            </style>
			
 
				+            """,
			
 
				+            unsafe_allow_html=True,
			
 
				+        )
			
 
				+
			
 
				+        st.markdown(
			
 
				+            f"""
			
 
				+            <div class="diag-hero">
			
 
				+                <div class="diag-badge" style="color:{level_fg}; background:{level_bg};">
			
 
				+                    当前诊断等级：{dashboard["severity_level"]}
			
 
				+                </div>
			
 
				+                <h2>缺陷诊断驾驶舱</h2>
			
 
				+                <p>{dashboard["primary_recommendation"]}</p>
			
 
				+            </div>
			
 
				+            """,
			
 
				+            unsafe_allow_html=True,
			
 
				+        )
			
 
				+
			
 
				+        card1, card2, card3, card4 = st.columns(4)
			
 
				+        with card1:
			
 
				+            st.markdown(
			
 
				+                f"""
			
 
				+                <div class="diag-card">
			
 
				+                    <div class="label">筛选后缺陷</div>
			
 
				+                    <div class="value">{len(filtered_df)}</div>
			
 
				+                    <div class="hint">涉及 {filtered_df["panel_id"].nunique()} 块面板</div>
			
 
				+                </div>
			
 
				+                """,
			
 
				+                unsafe_allow_html=True,
			
 
				+            )
			
 
				+        with card2:
			
 
				+            st.markdown(
			
 
				+                f"""
			
 
				+                <div class="diag-card">
			
 
				+                    <div class="label">主导缺陷类型</div>
			
 
				+                    <div class="value">{dashboard["top_defect_type"]}</div>
			
 
				+                    <div class="hint">占全部缺陷 {dashboard["top_defect_share"]:.1%}</div>
			
 
				+                </div>
			
 
				+                """,
			
 
				+                unsafe_allow_html=True,
			
 
				+            )
			
 
				+        with card3:
			
 
				+            st.markdown(
			
 
				+                f"""
			
 
				+                <div class="diag-card">
			
 
				+                    <div class="label">严重缺陷占比</div>
			
 
				+                    <div class="value">{dashboard["serious_share"]:.1%}</div>
			
 
				+                    <div class="hint">高于 20% 建议立即复盘</div>
			
 
				+                </div>
			
 
				+                """,
			
 
				+                unsafe_allow_html=True,
			
 
				+            )
			
 
				+        with card4:
			
 
				+            top_root = dashboard["root_causes"].iloc[0] if len(dashboard["root_causes"]) else None
			
 
				+            root_name = top_root["根因候选"] if top_root is not None else "-"
			
 
				+            root_share = top_root["占比"] if top_root is not None else 0
			
 
				+            root_lift = top_root["异常倍数"] if top_root is not None else 0
			
 
				+            st.markdown(
			
 
				+                f"""
			
 
				+                <div class="diag-card">
			
 
				+                    <div class="label">首要根因候选</div>
			
 
				+                    <div class="value" style="font-size:22px;">{root_name}</div>
			
 
				+                    <div class="hint">贡献 {root_share:.1%} 缺陷，异常 {root_lift:.2f}x</div>
			
 
				+                </div>
			
 
				+                """,
			
 
				+                unsafe_allow_html=True,
			
 
				+            )
			
 
				+
			
 
				+        st.divider()
			
 
				+        left, right = st.columns([1.25, 1])
			
 
				+        with left:
			
 
				+            st.subheader("交互式面板数字孪生")
			
 
				+            panel_w = float(df["panel_width_mm"].iloc[0])
			
 
				+            panel_h = float(df["panel_height_mm"].iloc[0])
			
 
				+            fig_map = go.Figure()
			
 
				+            fig_map.add_shape(
			
 
				+                type="rect",
			
 
				+                x0=0,
			
 
				+                y0=0,
			
 
				+                x1=panel_w,
			
 
				+                y1=panel_h,
			
 
				+                line=dict(color="#0f172a", width=2),
			
 
				+                fillcolor="#f8fafc",
			
 
				+                layer="below",
			
 
				+            )
			
 
				+            fig_map.add_trace(
			
 
				+                go.Scatter(
			
 
				+                    x=filtered_df["x_mm"],
			
 
				+                    y=filtered_df["y_mm"],
			
 
				+                    mode="markers",
			
 
				+                    marker=dict(
			
 
				+                        size=7,
			
 
				+                        color=filtered_df["severity"].map({"轻微": 1, "中等": 2, "严重": 3}),
			
 
				+                        colorscale=[[0, "#38bdf8"], [0.5, "#f59e0b"], [1, "#dc2626"]],
			
 
				+                        showscale=True,
			
 
				+                        colorbar=dict(title="严重度"),
			
 
				+                        opacity=0.72,
			
 
				+                        line=dict(width=0.4, color="#ffffff"),
			
 
				+                    ),
			
 
				+                    text=filtered_df["defect_id"],
			
 
				+                    customdata=filtered_df[["defect_type", "severity", "equipment_id", "seat_id", "batch_id"]],
			
 
				+                    hovertemplate=(
			
 
				+                        "缺陷ID: %{text}<br>"
			
 
				+                        "坐标: (%{x:.1f}, %{y:.1f}) mm<br>"
			
 
				+                        "类型: %{customdata[0]}<br>"
			
 
				+                        "严重度: %{customdata[1]}<br>"
			
 
				+                        "设备/座号: %{customdata[2]} / %{customdata[3]}<br>"
			
 
				+                        "批次: %{customdata[4]}<extra></extra>"
			
 
				+                    ),
			
 
				+                    name="缺陷点",
			
 
				+                )
			
 
				+            )
			
 
				+            fig_map.add_vrect(x0=0, x1=panel_w * 0.1, fillcolor="#f97316", opacity=0.08, line_width=0)
			
 
				+            fig_map.add_vrect(x0=panel_w * 0.9, x1=panel_w, fillcolor="#f97316", opacity=0.08, line_width=0)
			
 
				+            fig_map.add_hrect(y0=panel_h * 0.72, y1=panel_h * 0.88, fillcolor="#14b8a6", opacity=0.09, line_width=0)
			
 
				+            fig_map.update_layout(
			
 
				+                height=560,
			
 
				+                margin=dict(l=18, r=18, t=30, b=18),
			
 
				+                plot_bgcolor="#ffffff",
			
 
				+                paper_bgcolor="#ffffff",
			
 
				+                xaxis=dict(title="X (mm)", range=[0, panel_w], showgrid=True, gridcolor="#e2e8f0"),
			
 
				+                yaxis=dict(title="Y (mm)", range=[0, panel_h], scaleanchor="x", scaleratio=1, showgrid=True, gridcolor="#e2e8f0"),
			
 
				+                title="按真实屏幕比例定位缺陷，橙色为边缘敏感区，青色为 FPC 关注区",
			
 
				+            )
			
 
				+            st.plotly_chart(fig_map, use_container_width=True)
			
 
				+
			
 
				+            fig_density = px.density_heatmap(
			
 
				+                filtered_df,
			
 
				+                x="x_mm",
			
 
				+                y="y_mm",
			
 
				+                nbinsx=28,
			
 
				+                nbinsy=42,
			
 
				+                color_continuous_scale="YlOrRd",
			
 
				+                title="密度热区视图",
			
 
				+                labels={"x_mm": "X (mm)", "y_mm": "Y (mm)"},
			
 
				+            )
			
 
				+            fig_density.update_layout(height=300, margin=dict(l=18, r=18, t=42, b=18))
			
 
				+            st.plotly_chart(fig_density, use_container_width=True)
			
 
				+
			
 
				+        with right:
			
 
				+            st.subheader("根因候选榜")
			
 
				+            root_causes = dashboard["root_causes"].copy()
			
 
				+            fig_root = px.bar(
			
 
				+                root_causes.sort_values("风险分", ascending=True),
			
 
				+                x="风险分",
			
 
				+                y="根因候选",
			
 
				+                orientation="h",
			
 
				+                color="异常倍数",
			
 
				+                color_continuous_scale="Tealrose",
			
 
				+                text="风险分",
			
 
				+                hover_data={
			
 
				+                    "缺陷数": True,
			
 
				+                    "占比": ":.1%",
			
 
				+                    "异常倍数": ":.2f",
			
 
				+                    "涉及面板": True,
			
 
				+                    "主要缺陷": True,
			
 
				+                    "严重占比": ":.1%",
			
 
				+                    "风险分": ":.1f",
			
 
				+                },
			
 
				+                labels={"风险分": "风险分", "根因候选": ""},
			
 
				+            )
			
 
				+            fig_root.update_traces(texttemplate="%{text:.1f}", textposition="outside")
			
 
				+            fig_root.update_layout(height=360, margin=dict(l=8, r=20, t=20, b=20))
			
 
				+            st.plotly_chart(fig_root, use_container_width=True)
			
 
				+
			
 
				+            root_table = root_causes.copy()
			
 
				+            root_table["占比"] = root_table["占比"].map(lambda v: f"{v:.1%}")
			
 
				+            root_table["异常倍数"] = root_table["异常倍数"].map(lambda v: f"{v:.2f}x")
			
 
				+            root_table["严重占比"] = root_table["严重占比"].map(lambda v: f"{v:.1%}")
			
 
				+            st.dataframe(root_table, use_container_width=True, hide_index=True)
			
 
				+            st.caption("风险分 = 贡献规模 + 异常倍数 + 严重占比 + 涉及面板数。先查高贡献且高偏离的组合。")
			
 
				+
			
 
				+        trend_col, pareto_col = st.columns([1, 1])
			
 
				+        with trend_col:
			
 
				+            st.subheader("每日缺陷走势")
			
 
				+            daily_trend = dashboard["daily_trend"]
			
 
				+            fig_trend_dash = px.area(
			
 
				+                daily_trend,
			
 
				+                x="day",
			
 
				+                y="缺陷数",
			
 
				+                markers=True,
			
 
				+                color_discrete_sequence=["#0f766e"],
			
 
				+                labels={"day": "日期", "缺陷数": "缺陷数"},
			
 
				+            )
			
 
				+            fig_trend_dash.update_traces(line=dict(width=3), fillcolor="rgba(20, 184, 166, .22)")
			
 
				+            fig_trend_dash.update_layout(height=350, margin=dict(l=18, r=18, t=20, b=18))
			
 
				+            st.plotly_chart(fig_trend_dash, use_container_width=True)
			
 
				+
			
 
				+        with pareto_col:
			
 
				+            st.subheader("缺陷类型 Pareto")
			
 
				+            pareto = dashboard["pareto"].head(8)
			
 
				+            fig_pareto_dash = go.Figure()
			
 
				+            fig_pareto_dash.add_trace(
			
 
				+                go.Bar(
			
 
				+                    x=pareto["缺陷类型"],
			
 
				+                    y=pareto["缺陷数"],
			
 
				+                    marker_color="#334155",
			
 
				+                    name="缺陷数",
			
 
				+                    hovertemplate="%{x}<br>缺陷数: %{y}<extra></extra>",
			
 
				+                )
			
 
				+            )
			
 
				+            fig_pareto_dash.add_trace(
			
 
				+                go.Scatter(
			
 
				+                    x=pareto["缺陷类型"],
			
 
				+                    y=pareto["累计占比"],
			
 
				+                    yaxis="y2",
			
 
				+                    mode="lines+markers",
			
 
				+                    line=dict(color="#dc2626", width=3),
			
 
				+                    name="累计占比",
			
 
				+                    hovertemplate="%{x}<br>累计占比: %{y:.1%}<extra></extra>",
			
 
				+                )
			
 
				+            )
			
 
				+            fig_pareto_dash.update_layout(
			
 
				+                height=350,
			
 
				+                margin=dict(l=18, r=18, t=20, b=18),
			
 
				+                yaxis=dict(title="缺陷数"),
			
 
				+                yaxis2=dict(title="累计占比", overlaying="y", side="right", tickformat=".0%"),
			
 
				+                legend=dict(orientation="h", y=1.12),
			
 
				+            )
			
 
				+            st.plotly_chart(fig_pareto_dash, use_container_width=True)
			
 
				+
			
 
				 # ========== Tab 1: 空间集中性 ==========
			
 
				 _t = get_tab("🗺️ 空间集中性")
			
 
				 if _t:
			
@@ -1016,31 +1276,19 @@ if _t:
 
				     # --- 数据准备：按天计算缺陷率 ---
			
 
				     # 需要知道每天检测了多少面板才能算缺陷率
			
 
				     # 用 batch_id 近似日期
			
 
				-        daily_all = df.groupby("day").agg(
			
 
				-            total_defects=("defect_id", "count"),
			
 
				-            panels_with_defects=("panel_id", "nunique")
			
 
				-        ).reset_index()
			
 
				-        daily_all["day"] = pd.to_datetime(daily_all["day"])
			
 
				-        daily_all = daily_all.sort_values("day").reset_index(drop=True)
			
 
				+        spc_metrics = calculate_spc_metrics(df)
			
 
				+        daily_all = spc_metrics["daily"]
			
 
				 
			
 
				         if len(daily_all) < 2:
			
 
				             st.warning("数据天数不足，无法生成控制图")
			
 
				         else:
			
 
				-        # 估算每天检测总数：用总面板数 / 总天数近似
			
 
				-            total_days = (df["timestamp"].max() - df["timestamp"].min()).days + 1
			
 
				-            total_unique_panels = df["panel_id"].nunique()
			
 
				-            daily_all["estimated_inspected"] = max(total_unique_panels // max(total_days // 7, 1), 1)  # 按工作日估算
			
 
				-            daily_all["defect_rate"] = daily_all["panels_with_defects"] / daily_all["estimated_inspected"]
			
 
				-
			
 
				         # 控制限计算
			
 
				-            p_bar = daily_all["defect_rate"].mean()
			
 
				-            n_avg = daily_all["estimated_inspected"].mean()
			
 
				-            sigma_p = np.sqrt(p_bar * (1 - p_bar) / n_avg) if n_avg > 0 and p_bar > 0 else 0
			
 
				-
			
 
				-            UCL = p_bar + 3 * sigma_p  # 上控制限
			
 
				-            LCL = max(0, p_bar - 3 * sigma_p)  # 下控制限
			
 
				-            UWL = p_bar + 2 * sigma_p  # 上警告限
			
 
				-            LWL = max(0, p_bar - 2 * sigma_p)  # 下警告限
			
 
				+            p_bar = spc_metrics["p_bar"]
			
 
				+            sigma_p = spc_metrics["sigma_p"]
			
 
				+            UCL = spc_metrics["ucl"]
			
 
				+            LCL = spc_metrics["lcl"]
			
 
				+            UWL = spc_metrics["uwl"]
			
 
				+            LWL = spc_metrics["lwl"]
			
 
				 
			
 
				         # --- Western Electric 规则检测 ---
			
 
				             we_violations = []
			
@@ -1986,11 +2234,13 @@ if current_config["show_export"]:
 
				 
			
 
				     # 1. KPI 摘要
			
 
				     report_parts.append("## 1. KPI 摘要\n")
			
 
				-    total_panels_inspected_r = df[df["timestamp"] >= start_date]["panel_id"].nunique()
			
 
				-    defective_panels_r = filtered_df["panel_id"].nunique()
			
 
				-    yield_rate_r = (1 - defective_panels_r / max(total_panels_inspected_r, 1)) * 100
			
 
				+    report_kpis = calculate_kpis(df, filtered_df)
			
 
				+    total_panels_inspected_r = report_kpis["total_panels_inspected"]
			
 
				+    defective_panels_r = report_kpis["defective_panels"]
			
 
				+    yield_rate_r = report_kpis["yield_rate"]
			
 
				     report_parts.append(f"- 检测面板数: {total_panels_inspected_r} 块")
			
 
				-    report_parts.append(f"- 不良面板数: {defective_panels_r} 块 ({defective_panels_r/total_panels_inspected_r*100:.1f}%)")
			
 
				+    defective_rate_r = defective_panels_r / max(total_panels_inspected_r, 1) * 100
			
 
				+    report_parts.append(f"- 不良面板数: {defective_panels_r} 块 ({defective_rate_r:.1f}%)")
			
 
				     report_parts.append(f"- 综合良率: {yield_rate_r:.1f}%")
			
 
				     report_parts.append(f"- 缺陷总数: {len(filtered_df)} 个")
			
 
				     report_parts.append(f"- 严重缺陷: {(filtered_df['severity']=='严重').sum()} 个\n")
			
--- a/app_utils.py
+++ b/app_utils.py
@@ -0,0 +1,190 @@
 
				+"""缺陷分析页面的可测试业务逻辑。"""
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+
			
 
				+def normalize_date_bounds(start_date, end_date):
			
 
				+    """把日期范围转换成左闭右开的时间边界，确保结束日期整天被包含。"""
			
 
				+    start_ts = pd.Timestamp(start_date).normalize()
			
 
				+    end_exclusive = pd.Timestamp(end_date).normalize() + pd.Timedelta(days=1)
			
 
				+    return start_ts, end_exclusive
			
 
				+
			
 
				+
			
 
				+def apply_defect_filters(
			
 
				+    df,
			
 
				+    *,
			
 
				+    start_date,
			
 
				+    end_date,
			
 
				+    selected_types,
			
 
				+    selected_batches,
			
 
				+    selected_equipment,
			
 
				+    selected_seats,
			
 
				+    selected_shift="全部",
			
 
				+    selected_severity="全部",
			
 
				+):
			
 
				+    """应用页面筛选条件。"""
			
 
				+    start_ts, end_exclusive = normalize_date_bounds(start_date, end_date)
			
 
				+    mask = (
			
 
				+        (df["timestamp"] >= start_ts)
			
 
				+        & (df["timestamp"] < end_exclusive)
			
 
				+        & (df["defect_type"].isin(selected_types))
			
 
				+        & (df["batch_id"].isin(selected_batches))
			
 
				+        & (df["equipment_id"].isin(selected_equipment))
			
 
				+    )
			
 
				+    if selected_shift != "全部":
			
 
				+        mask &= df["shift"] == selected_shift
			
 
				+    if selected_severity != "全部":
			
 
				+        mask &= df["severity"] == selected_severity
			
 
				+    if selected_seats:
			
 
				+        mask &= df["seat_id"].isin(selected_seats)
			
 
				+
			
 
				+    return df[mask].copy()
			
 
				+
			
 
				+
			
 
				+def calculate_kpis(source_df, filtered_df):
			
 
				+    """基于当前筛选结果计算页面 KPI。"""
			
 
				+    total_panels_inspected = filtered_df["panel_id"].nunique()
			
 
				+    defective_panels = filtered_df["panel_id"].nunique()
			
 
				+    total_defects = len(filtered_df)
			
 
				+    critical_defects = int((filtered_df["severity"] == "严重").sum()) if total_defects else 0
			
 
				+    top_defect_type = filtered_df["defect_type"].mode().iloc[0] if total_defects else "-"
			
 
				+    yield_rate = (1 - defective_panels / max(total_panels_inspected, 1)) * 100
			
 
				+
			
 
				+    return {
			
 
				+        "total_panels_inspected": int(total_panels_inspected),
			
 
				+        "defective_panels": int(defective_panels),
			
 
				+        "yield_rate": float(yield_rate),
			
 
				+        "total_defects": int(total_defects),
			
 
				+        "critical_defects": int(critical_defects),
			
 
				+        "top_defect_type": top_defect_type,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def calculate_spc_metrics(df):
			
 
				+    """计算 SPC 所需数据，防止模拟分母造成非法概率。"""
			
 
				+    daily = df.groupby("day").agg(
			
 
				+        total_defects=("defect_id", "count"),
			
 
				+        panels_with_defects=("panel_id", "nunique"),
			
 
				+    ).reset_index()
			
 
				+    daily["day"] = pd.to_datetime(daily["day"])
			
 
				+    daily = daily.sort_values("day").reset_index(drop=True)
			
 
				+
			
 
				+    if len(daily) < 2:
			
 
				+        return {
			
 
				+            "daily": daily,
			
 
				+            "p_bar": 0.0,
			
 
				+            "ucl": 0.0,
			
 
				+            "lcl": 0.0,
			
 
				+            "uwl": 0.0,
			
 
				+            "lwl": 0.0,
			
 
				+            "sigma_p": 0.0,
			
 
				+        }
			
 
				+
			
 
				+    total_days = (df["timestamp"].max() - df["timestamp"].min()).days + 1
			
 
				+    total_unique_panels = df["panel_id"].nunique()
			
 
				+    estimated = max(total_unique_panels // max(total_days // 7, 1), 1)
			
 
				+    daily["estimated_inspected"] = np.maximum(estimated, daily["panels_with_defects"])
			
 
				+    daily["defect_rate"] = (
			
 
				+        daily["panels_with_defects"] / daily["estimated_inspected"]
			
 
				+    ).clip(lower=0, upper=1)
			
 
				+
			
 
				+    p_bar = float(np.clip(daily["defect_rate"].mean(), 0, 1))
			
 
				+    n_avg = float(daily["estimated_inspected"].mean())
			
 
				+    sigma_p = float(np.sqrt(max(p_bar * (1 - p_bar), 0) / n_avg)) if n_avg > 0 else 0.0
			
 
				+
			
 
				+    return {
			
 
				+        "daily": daily,
			
 
				+        "p_bar": p_bar,
			
 
				+        "ucl": min(1.0, p_bar + 3 * sigma_p),
			
 
				+        "lcl": max(0.0, p_bar - 3 * sigma_p),
			
 
				+        "uwl": min(1.0, p_bar + 2 * sigma_p),
			
 
				+        "lwl": max(0.0, p_bar - 2 * sigma_p),
			
 
				+        "sigma_p": sigma_p,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def build_diagnostic_dashboard(df):
			
 
				+    """生成诊断驾驶舱需要的摘要、根因候选和趋势数据。"""
			
 
				+    total_defects = len(df)
			
 
				+    if total_defects == 0:
			
 
				+        return {
			
 
				+            "severity_level": "正常",
			
 
				+            "top_defect_type": "-",
			
 
				+            "top_defect_share": 0.0,
			
 
				+            "serious_share": 0.0,
			
 
				+            "root_causes": pd.DataFrame(),
			
 
				+            "daily_trend": pd.DataFrame(),
			
 
				+            "pareto": pd.DataFrame(),
			
 
				+            "primary_recommendation": "当前筛选条件下没有缺陷记录。",
			
 
				+        }
			
 
				+
			
 
				+    type_counts = df["defect_type"].value_counts()
			
 
				+    top_defect_type = type_counts.index[0]
			
 
				+    top_defect_share = float(type_counts.iloc[0] / total_defects)
			
 
				+    serious_share = float((df["severity"] == "严重").sum() / total_defects)
			
 
				+
			
 
				+    root_causes = (
			
 
				+        df.groupby(["equipment_id", "seat_id"])
			
 
				+        .agg(
			
 
				+            缺陷数=("defect_id", "count"),
			
 
				+            涉及面板=("panel_id", "nunique"),
			
 
				+            主要缺陷=("defect_type", lambda s: s.mode().iloc[0]),
			
 
				+            严重数=("severity", lambda s: int((s == "严重").sum())),
			
 
				+        )
			
 
				+        .reset_index()
			
 
				+    )
			
 
				+    root_causes["根因候选"] = root_causes["equipment_id"] + " / " + root_causes["seat_id"]
			
 
				+    root_causes["占比"] = root_causes["缺陷数"] / total_defects
			
 
				+    root_causes["严重占比"] = root_causes["严重数"] / root_causes["缺陷数"].clip(lower=1)
			
 
				+    equipment_totals = df.groupby("equipment_id")["defect_id"].count()
			
 
				+    equipment_seat_counts = df.groupby("equipment_id")["seat_id"].nunique().clip(lower=1)
			
 
				+    root_causes["期望缺陷数"] = root_causes["equipment_id"].map(
			
 
				+        equipment_totals / equipment_seat_counts
			
 
				+    ).clip(lower=0.001)
			
 
				+    root_causes["异常倍数"] = (root_causes["缺陷数"] / root_causes["期望缺陷数"]).round(2)
			
 
				+    count_score = root_causes["缺陷数"] / root_causes["缺陷数"].max()
			
 
				+    panel_score = root_causes["涉及面板"] / df["panel_id"].nunique()
			
 
				+    lift_score = (root_causes["异常倍数"] / 3).clip(upper=1)
			
 
				+    root_causes["风险分"] = (
			
 
				+        count_score * 55 + lift_score * 25 + root_causes["严重占比"] * 15 + panel_score * 5
			
 
				+    ).round(1)
			
 
				+    root_causes = root_causes.sort_values(["风险分", "缺陷数"], ascending=False).head(8)
			
 
				+    root_causes = root_causes[
			
 
				+        ["根因候选", "缺陷数", "占比", "异常倍数", "涉及面板", "主要缺陷", "严重占比", "风险分"]
			
 
				+    ].reset_index(drop=True)
			
 
				+
			
 
				+    pareto = type_counts.rename_axis("缺陷类型").reset_index(name="缺陷数")
			
 
				+    pareto["占比"] = pareto["缺陷数"] / total_defects
			
 
				+    pareto["累计占比"] = pareto["占比"].cumsum()
			
 
				+
			
 
				+    daily_trend = df.groupby("day").size().rename("缺陷数").reset_index()
			
 
				+    daily_trend["day"] = pd.to_datetime(daily_trend["day"])
			
 
				+    daily_trend = daily_trend.sort_values("day")
			
 
				+
			
 
				+    if serious_share >= 0.2 or (len(root_causes) > 0 and root_causes.iloc[0]["占比"] >= 0.15):
			
 
				+        severity_level = "严重"
			
 
				+    elif serious_share >= 0.1 or top_defect_share >= 0.35:
			
 
				+        severity_level = "关注"
			
 
				+    else:
			
 
				+        severity_level = "正常"
			
 
				+
			
 
				+    if len(root_causes) > 0:
			
 
				+        top_root = root_causes.iloc[0]
			
 
				+        primary_recommendation = (
			
 
				+            f"优先排查 {top_root['根因候选']}，该组合贡献 {top_root['占比']:.1%} "
			
 
				+            f"缺陷，异常倍数 {top_root['异常倍数']:.2f}x，主要类型为 {top_root['主要缺陷']}。"
			
 
				+        )
			
 
				+    else:
			
 
				+        primary_recommendation = f"优先排查 {top_defect_type} 相关工艺参数。"
			
 
				+
			
 
				+    return {
			
 
				+        "severity_level": severity_level,
			
 
				+        "top_defect_type": top_defect_type,
			
 
				+        "top_defect_share": top_defect_share,
			
 
				+        "serious_share": serious_share,
			
 
				+        "root_causes": root_causes,
			
 
				+        "daily_trend": daily_trend,
			
 
				+        "pareto": pareto,
			
 
				+        "primary_recommendation": primary_recommendation,
			
 
				+    }
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,7 @@
 
				+streamlit
			
 
				+pandas
			
 
				+numpy
			
 
				+matplotlib
			
 
				+seaborn
			
 
				+scikit-learn
			
 
				+plotly
			
--- a/tests/test_app_utils.py
+++ b/tests/test_app_utils.py
@@ -0,0 +1,119 @@
 
				+import math
			
 
				+import os
			
 
				+import sys
			
 
				+import unittest
			
 
				+
			
 
				+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+
			
 
				+import pandas as pd
			
 
				+
			
 
				+from app_utils import (
			
 
				+    apply_defect_filters,
			
 
				+    build_diagnostic_dashboard,
			
 
				+    calculate_kpis,
			
 
				+    calculate_spc_metrics,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class AppUtilsTest(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self.df = pd.DataFrame(
			
 
				+            {
			
 
				+                "defect_id": ["D1", "D2", "D3", "D4"],
			
 
				+                "panel_id": ["P1", "P2", "P2", "P3"],
			
 
				+                "batch_id": ["B1", "B1", "B2", "B2"],
			
 
				+                "equipment_id": ["E1", "E1", "E2", "E2"],
			
 
				+                "seat_id": ["S1", "S2", "S1", "S2"],
			
 
				+                "timestamp": pd.to_datetime(
			
 
				+                    [
			
 
				+                        "2026-04-01 00:00:00",
			
 
				+                        "2026-04-01 23:59:59",
			
 
				+                        "2026-04-02 12:00:00",
			
 
				+                        "2026-04-03 00:00:01",
			
 
				+                    ]
			
 
				+                ),
			
 
				+                "defect_type": ["划痕", "亮点", "划痕", "暗点"],
			
 
				+                "severity": ["严重", "轻微", "中等", "严重"],
			
 
				+                "shift": ["白班", "夜班", "白班", "白班"],
			
 
				+                "day": ["2026-04-01", "2026-04-01", "2026-04-02", "2026-04-03"],
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    def test_date_filter_includes_full_end_date(self):
			
 
				+        filtered = apply_defect_filters(
			
 
				+            self.df,
			
 
				+            start_date=pd.Timestamp("2026-04-01"),
			
 
				+            end_date=pd.Timestamp("2026-04-01"),
			
 
				+            selected_types=["划痕", "亮点", "暗点"],
			
 
				+            selected_batches=["B1", "B2"],
			
 
				+            selected_equipment=["E1", "E2"],
			
 
				+            selected_seats=["S1", "S2"],
			
 
				+            selected_shift="全部",
			
 
				+            selected_severity="全部",
			
 
				+        )
			
 
				+
			
 
				+        self.assertEqual(["D1", "D2"], filtered["defect_id"].tolist())
			
 
				+
			
 
				+    def test_kpis_use_same_filter_scope_for_total_panels(self):
			
 
				+        filtered = apply_defect_filters(
			
 
				+            self.df,
			
 
				+            start_date=pd.Timestamp("2026-04-01"),
			
 
				+            end_date=pd.Timestamp("2026-04-02"),
			
 
				+            selected_types=["划痕"],
			
 
				+            selected_batches=["B1", "B2"],
			
 
				+            selected_equipment=["E1", "E2"],
			
 
				+            selected_seats=["S1"],
			
 
				+            selected_shift="全部",
			
 
				+            selected_severity="全部",
			
 
				+        )
			
 
				+
			
 
				+        kpis = calculate_kpis(self.df, filtered)
			
 
				+
			
 
				+        self.assertEqual(2, kpis["total_panels_inspected"])
			
 
				+        self.assertEqual(2, kpis["defective_panels"])
			
 
				+        self.assertEqual(0.0, kpis["yield_rate"])
			
 
				+
			
 
				+    def test_spc_metrics_clamp_estimated_rate_to_valid_probability(self):
			
 
				+        metrics = calculate_spc_metrics(self.df)
			
 
				+
			
 
				+        self.assertTrue(math.isfinite(metrics["p_bar"]))
			
 
				+        self.assertTrue(math.isfinite(metrics["ucl"]))
			
 
				+        self.assertTrue(math.isfinite(metrics["lcl"]))
			
 
				+        self.assertLessEqual(metrics["daily"]["defect_rate"].max(), 1.0)
			
 
				+
			
 
				+    def test_diagnostic_dashboard_ranks_root_cause_candidates(self):
			
 
				+        dashboard = build_diagnostic_dashboard(self.df)
			
 
				+
			
 
				+        self.assertEqual("严重", dashboard["severity_level"])
			
 
				+        self.assertEqual("E1 / S1", dashboard["root_causes"].iloc[0]["根因候选"])
			
 
				+        self.assertEqual("划痕", dashboard["top_defect_type"])
			
 
				+        self.assertIn("优先排查", dashboard["primary_recommendation"])
			
 
				+
			
 
				+    def test_diagnostic_dashboard_reports_baseline_lift(self):
			
 
				+        rows = []
			
 
				+        for i in range(10):
			
 
				+            rows.append(
			
 
				+                {
			
 
				+                    "defect_id": f"D{i}",
			
 
				+                    "panel_id": f"P{i}",
			
 
				+                    "batch_id": "B1",
			
 
				+                    "equipment_id": "E1",
			
 
				+                    "seat_id": "S-hot" if i < 8 else "S-cold",
			
 
				+                    "timestamp": pd.Timestamp("2026-04-01"),
			
 
				+                    "defect_type": "气泡",
			
 
				+                    "severity": "严重" if i < 2 else "轻微",
			
 
				+                    "shift": "白班",
			
 
				+                    "day": "2026-04-01",
			
 
				+                }
			
 
				+            )
			
 
				+        df = pd.DataFrame(rows)
			
 
				+
			
 
				+        dashboard = build_diagnostic_dashboard(df)
			
 
				+        top = dashboard["root_causes"].iloc[0]
			
 
				+
			
 
				+        self.assertEqual("E1 / S-hot", top["根因候选"])
			
 
				+        self.assertGreater(top["异常倍数"], 1.0)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main()