"""训练数据集构建。""" from defect_analysis.ml.features import build_feature_frame from defect_analysis.schemas import normalize_defect_schema def build_target_series(df, *, target_defect_type=None, target_severity=None): normalized = normalize_defect_schema(df) if target_defect_type: return (normalized["defect_type"] == target_defect_type).astype(int) if target_severity: return (normalized["severity"] == target_severity).astype(int) return (normalized["severity"] == "严重").astype(int) def build_supervised_dataset(df, *, target_defect_type=None, target_severity=None): """构建监督学习数据集。""" return build_feature_frame(df), build_target_series( df, target_defect_type=target_defect_type, target_severity=target_severity, )