|
@@ -18,10 +18,10 @@ from sklearn.cluster import DBSCAN
|
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.decomposition import PCA
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
from defect_analysis.data_quality import build_data_quality_report
|
|
from defect_analysis.data_quality import build_data_quality_report
|
|
|
-from defect_analysis.ml.key_factors import find_key_factors
|
|
|
|
|
from app_utils import (
|
|
from app_utils import (
|
|
|
apply_defect_filters,
|
|
apply_defect_filters,
|
|
|
build_diagnostic_dashboard,
|
|
build_diagnostic_dashboard,
|
|
|
|
|
+ build_ml_factor_insights,
|
|
|
calculate_kpis,
|
|
calculate_kpis,
|
|
|
calculate_spc_metrics,
|
|
calculate_spc_metrics,
|
|
|
generate_industry_diagnosis,
|
|
generate_industry_diagnosis,
|
|
@@ -108,6 +108,17 @@ def load_data_from_csv():
|
|
|
df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
|
|
df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
|
|
|
return normalize_defect_schema(df)
|
|
return normalize_defect_schema(df)
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+@st.cache_data(ttl=300, show_spinner=False)
|
|
|
|
|
+def build_cached_ml_factor_insights(data, target_defect_type, model_name, top_n):
|
|
|
|
|
+ """缓存 ML 训练洞察,避免页面交互时重复训练。"""
|
|
|
|
|
+ return build_ml_factor_insights(
|
|
|
|
|
+ data,
|
|
|
|
|
+ target_defect_type=target_defect_type,
|
|
|
|
|
+ model_name=model_name,
|
|
|
|
|
+ top_n=top_n,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
if data_source == "上传CSV文件" and uploaded_df is not None:
|
|
if data_source == "上传CSV文件" and uploaded_df is not None:
|
|
|
df = uploaded_df
|
|
df = uploaded_df
|
|
|
else:
|
|
else:
|
|
@@ -605,11 +616,39 @@ _t = get_tab("🔬 ML 因子分析")
|
|
|
if _t:
|
|
if _t:
|
|
|
with _t:
|
|
with _t:
|
|
|
dashboard = build_diagnostic_dashboard(filtered_df)
|
|
dashboard = build_diagnostic_dashboard(filtered_df)
|
|
|
- key_factors = find_key_factors(filtered_df, target_defect_type=dashboard["top_defect_type"], top_n=10)
|
|
|
|
|
extended_root_causes = dashboard.get("extended_root_causes")
|
|
extended_root_causes = dashboard.get("extended_root_causes")
|
|
|
|
|
|
|
|
st.header("根因与关键因子分析")
|
|
st.header("根因与关键因子分析")
|
|
|
- st.markdown("综合规则评分、统计分析与行业维度,输出可解释的异常候选。")
|
|
|
|
|
|
|
+ st.markdown("综合规则评分、统计分析、机器学习验证与行业维度,输出可解释的异常候选。")
|
|
|
|
|
+ ml_col1, ml_col2, ml_col3 = st.columns([1, 1, 1])
|
|
|
|
|
+ with ml_col1:
|
|
|
|
|
+ ml_target_type = st.selectbox(
|
|
|
|
|
+ "目标缺陷",
|
|
|
|
|
+ options=sorted(filtered_df["defect_type"].dropna().unique()),
|
|
|
|
|
+ index=sorted(filtered_df["defect_type"].dropna().unique()).index(dashboard["top_defect_type"])
|
|
|
|
|
+ if dashboard["top_defect_type"] in sorted(filtered_df["defect_type"].dropna().unique())
|
|
|
|
|
+ else 0,
|
|
|
|
|
+ )
|
|
|
|
|
+ with ml_col2:
|
|
|
|
|
+ ml_model_name = st.selectbox(
|
|
|
|
|
+ "ML 模型",
|
|
|
|
|
+ options=["random_forest", "logistic_regression", "xgboost", "lightgbm"],
|
|
|
|
|
+ format_func=lambda name: {
|
|
|
|
|
+ "random_forest": "RandomForest",
|
|
|
|
|
+ "logistic_regression": "LogisticRegression",
|
|
|
|
|
+ "xgboost": "XGBoost",
|
|
|
|
|
+ "lightgbm": "LightGBM",
|
|
|
|
|
+ }[name],
|
|
|
|
|
+ )
|
|
|
|
|
+ with ml_col3:
|
|
|
|
|
+ ml_top_n = st.slider("候选因子数", min_value=5, max_value=30, value=10, step=5)
|
|
|
|
|
+
|
|
|
|
|
+ ml_insights = build_cached_ml_factor_insights(
|
|
|
|
|
+ filtered_df,
|
|
|
|
|
+ ml_target_type,
|
|
|
|
|
+ ml_model_name,
|
|
|
|
|
+ ml_top_n,
|
|
|
|
|
+ )
|
|
|
st.divider()
|
|
st.divider()
|
|
|
|
|
|
|
|
if extended_root_causes is not None and not extended_root_causes.empty:
|
|
if extended_root_causes is not None and not extended_root_causes.empty:
|
|
@@ -621,15 +660,36 @@ if _t:
|
|
|
st.dataframe(extended_table, use_container_width=True, hide_index=True)
|
|
st.dataframe(extended_table, use_container_width=True, hide_index=True)
|
|
|
st.caption("覆盖治具、吸嘴、材料批次、清洗/绑定等维度,用于多前制程链路追溯。")
|
|
st.caption("覆盖治具、吸嘴、材料批次、清洗/绑定等维度,用于多前制程链路追溯。")
|
|
|
|
|
|
|
|
|
|
+ if ml_insights["error"]:
|
|
|
|
|
+ st.warning(f"ML 模型暂不可用:{ml_insights['error']}")
|
|
|
|
|
+ else:
|
|
|
|
|
+ metric_train = ml_insights["metrics"]
|
|
|
|
|
+ metric_valid = ml_insights["validation_metrics"]
|
|
|
|
|
+ m1, m2, m3, m4 = st.columns(4)
|
|
|
|
|
+ m1.metric("训练准确率", f"{metric_train.get('train_accuracy', 0):.1%}")
|
|
|
|
|
+ m2.metric("训练 AUC", f"{metric_train.get('train_auc', 0):.3f}")
|
|
|
|
|
+ m3.metric("验证准确率", f"{metric_valid.get('validation_accuracy', 0):.1%}")
|
|
|
|
|
+ m4.metric("验证 AUC", f"{metric_valid.get('validation_auc', 0):.3f}")
|
|
|
|
|
+
|
|
|
|
|
+ importance_df = pd.DataFrame(ml_insights["feature_importance"])
|
|
|
|
|
+ if not importance_df.empty:
|
|
|
|
|
+ st.subheader("模型特征贡献 TOP")
|
|
|
|
|
+ importance_df["importance"] = importance_df["importance"].map(lambda v: round(v, 4))
|
|
|
|
|
+ st.dataframe(importance_df.head(15), use_container_width=True, hide_index=True)
|
|
|
|
|
+ st.caption("用于判断模型主要依赖哪些设备、座号、材料批次、坐标或缺陷几何特征。")
|
|
|
|
|
+
|
|
|
|
|
+ key_factors = ml_insights["key_factors"]
|
|
|
if not key_factors.empty:
|
|
if not key_factors.empty:
|
|
|
- st.subheader(f"关键因子分析:{dashboard['top_defect_type']}")
|
|
|
|
|
|
|
+ st.subheader(f"关键因子分析:{ml_insights['target_defect_type']}")
|
|
|
key_factor_table = key_factors.copy()
|
|
key_factor_table = key_factors.copy()
|
|
|
key_factor_table["目标占比"] = key_factor_table["目标占比"].map(lambda v: f"{v:.1%}")
|
|
key_factor_table["目标占比"] = key_factor_table["目标占比"].map(lambda v: f"{v:.1%}")
|
|
|
key_factor_table["基线占比"] = key_factor_table["基线占比"].map(lambda v: f"{v:.1%}")
|
|
key_factor_table["基线占比"] = key_factor_table["基线占比"].map(lambda v: f"{v:.1%}")
|
|
|
key_factor_table["异常倍数"] = key_factor_table["异常倍数"].map(lambda v: f"{v:.2f}x")
|
|
key_factor_table["异常倍数"] = key_factor_table["异常倍数"].map(lambda v: f"{v:.2f}x")
|
|
|
key_factor_table["支持度"] = key_factor_table["支持度"].map(lambda v: f"{v:.1%}")
|
|
key_factor_table["支持度"] = key_factor_table["支持度"].map(lambda v: f"{v:.1%}")
|
|
|
|
|
+ if "ml_probability" in key_factor_table.columns:
|
|
|
|
|
+ key_factor_table["ml_probability"] = key_factor_table["ml_probability"].map(lambda v: f"{v:.1%}")
|
|
|
st.dataframe(key_factor_table, use_container_width=True, hide_index=True)
|
|
st.dataframe(key_factor_table, use_container_width=True, hide_index=True)
|
|
|
- st.caption("关键因子按目标缺陷占比、异常倍数、样本数和支持度综合排序。")
|
|
|
|
|
|
|
+ st.caption("关键因子按目标缺陷占比、异常倍数、样本数、支持度和模型概率综合排序。")
|
|
|
else:
|
|
else:
|
|
|
st.info("当前数据未找到显著关键因子,可放宽筛选条件或增加样本量。")
|
|
st.info("当前数据未找到显著关键因子,可放宽筛选条件或增加样本量。")
|
|
|
|
|
|