|
|
@@ -33,14 +33,14 @@ def predict_key_factors(df, *, target_defect_type=None, target_severity=None, mo
|
|
|
probabilities = pd.Series(model.predict_proba(X)[:, 1], index=X.index)
|
|
|
|
|
|
scored = key_factors.copy()
|
|
|
+ # 向量化:把 key_factors 的维度/因子值映射为 one-hot 列名后取概率均值
|
|
|
+ dimension = scored["维度"].astype(str)
|
|
|
+ value = scored["因子值"].astype(str)
|
|
|
+ column_names = dimension + "=" + value
|
|
|
ml_scores = []
|
|
|
- for _, row in scored.iterrows():
|
|
|
- dimension = row["维度"]
|
|
|
- value = row["因子值"]
|
|
|
- column = f"{dimension}={value}"
|
|
|
- if column in X.columns:
|
|
|
- mask = X[column] == 1
|
|
|
- ml_scores.append(float(probabilities.loc[mask].mean()) if mask.any() else 0.0)
|
|
|
+ for col in column_names:
|
|
|
+ if col in X.columns:
|
|
|
+ ml_scores.append(float(probabilities.loc[X[col] == 1].mean()) if X[col].any() else 0.0)
|
|
|
else:
|
|
|
ml_scores.append(0.0)
|
|
|
scored["ml_probability"] = ml_scores
|