import_to_database.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. """把缺陷 CSV 导入 SQLite 数据库。"""
  2. import argparse
  3. import pandas as pd
  4. from defect_analysis.data_quality import build_data_quality_report
  5. from defect_analysis.database import create_import_batch, init_database, insert_defects
  6. from defect_analysis.schemas import normalize_defect_schema
  7. def import_csv_to_database(csv_path, db_path, source_name=None):
  8. df = pd.read_csv(csv_path, parse_dates=["timestamp"])
  9. df = normalize_defect_schema(df)
  10. quality_report = build_data_quality_report(df)
  11. init_database(db_path)
  12. import_id = create_import_batch(
  13. db_path,
  14. source_name=source_name or str(csv_path),
  15. row_count=len(df),
  16. quality_score=quality_report["score"],
  17. )
  18. inserted = insert_defects(db_path, df, import_id=import_id)
  19. return {
  20. "import_id": import_id,
  21. "rows": len(df),
  22. "inserted": inserted,
  23. "quality_score": quality_report["score"],
  24. "issues": quality_report["issues"],
  25. }
  26. def main():
  27. parser = argparse.ArgumentParser(description="导入缺陷 CSV 到 SQLite 数据库")
  28. parser.add_argument("--csv", default="defect_data.csv", help="缺陷 CSV 文件路径")
  29. parser.add_argument("--db", default="defect_analysis.db", help="SQLite 数据库路径")
  30. args = parser.parse_args()
  31. result = import_csv_to_database(args.csv, args.db)
  32. print(
  33. f"导入批次 {result['import_id']} 完成: "
  34. f"读取 {result['rows']} 行,新增 {result['inserted']} 行,数据质量分 {result['quality_score']:.1f}"
  35. )
  36. if result["issues"] != ["数据质量良好"]:
  37. print("数据质量提示: " + ";".join(result["issues"]))
  38. if __name__ == "__main__":
  39. main()