| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- """结构化 ML 特征工程。"""
- import pandas as pd
- from defect_analysis.schemas import normalize_defect_schema
- CATEGORICAL_FEATURES = [
- "equipment_id",
- "seat_id",
- "lam_equipment_id",
- "lam_seat_id",
- "lam_fixture_id",
- "lam_nozzle_id",
- "material_lot_oca",
- "material_lot_glass",
- "clean_equipment_id",
- "bond_equipment_id",
- "shift",
- "defect_geometry_type",
- ]
- NUMERIC_FEATURES = [
- "x_mm",
- "y_mm",
- "hour",
- "width_mm",
- "height_mm",
- "length_mm",
- "angle_deg",
- "area_mm2",
- ]
- def build_feature_frame(df):
- """把标准缺陷数据转换为可训练的数值特征矩阵。"""
- normalized = normalize_defect_schema(df)
- numeric = normalized[[col for col in NUMERIC_FEATURES if col in normalized.columns]].copy()
- for column in numeric.columns:
- numeric[column] = pd.to_numeric(numeric[column], errors="coerce").fillna(0.0)
- categorical = normalized[[col for col in CATEGORICAL_FEATURES if col in normalized.columns]].fillna("")
- encoded = pd.get_dummies(categorical.astype(str), prefix_sep="=", dtype=float)
- features = pd.concat([numeric.reset_index(drop=True), encoded.reset_index(drop=True)], axis=1)
- return features.fillna(0.0)
|