app.py 94 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091
  1. """
  2. 缺陷集中性分析 - Streamlit 交互式可视化页面
  3. """
  4. import pandas as pd
  5. import numpy as np
  6. import matplotlib
  7. matplotlib.use("Agg")
  8. import matplotlib.pyplot as plt
  9. import matplotlib.font_manager as fm
  10. import seaborn as sns
  11. import streamlit as st
  12. import os
  13. from datetime import datetime
  14. from sklearn.cluster import DBSCAN
  15. from sklearn.decomposition import PCA
  16. from sklearn.preprocessing import StandardScaler
  17. # --- 中文字体设置 ---
  18. def setup_chinese_font():
  19. """设置中文字体"""
  20. font_paths = [
  21. r"C:\Windows\Fonts\msyh.ttc", # 微软雅黑
  22. r"C:\Windows\Fonts\simhei.ttf", # 黑体
  23. r"C:\Windows\Fonts\simsun.ttc", # 宋体
  24. r"C:\Windows\Fonts\malgun.ttf", # Malgun Gothic
  25. ]
  26. for fp in font_paths:
  27. if os.path.exists(fp):
  28. font_prop = fm.FontProperties(fname=fp)
  29. plt.rcParams["font.family"] = font_prop.get_name()
  30. plt.rcParams["axes.unicode_minus"] = False
  31. return font_prop
  32. # fallback
  33. plt.rcParams["font.sans-serif"] = ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
  34. plt.rcParams["axes.unicode_minus"] = False
  35. return None
  36. setup_chinese_font()
  37. # --- 页面配置 ---
  38. st.set_page_config(
  39. page_title="屏幕缺陷集中性分析",
  40. page_icon="🔍",
  41. layout="wide",
  42. initial_sidebar_state="expanded"
  43. )
  44. # --- 加载数据 ---
  45. @st.cache_data(ttl=300)
  46. def load_data():
  47. """加载并缓存数据"""
  48. if not os.path.exists("defect_data.csv"):
  49. st.error("未找到 defect_data.csv,请先运行 generate_data.py 生成数据")
  50. return None
  51. df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
  52. df["timestamp"] = pd.to_datetime(df["timestamp"])
  53. return df
  54. df = load_data()
  55. if df is None:
  56. st.stop()
  57. # --- 侧边栏 ---
  58. st.sidebar.title("🔍 筛选条件")
  59. # --- 数据源切换 ---
  60. st.sidebar.divider()
  61. st.sidebar.subheader("📂 数据源")
  62. data_source = st.sidebar.radio("选择数据源", ["内置模拟数据", "上传CSV文件"], label_visibility="collapsed")
  63. REQUIRED_COLUMNS = [
  64. "defect_id", "panel_id", "batch_id", "equipment_id", "seat_id",
  65. "inspection_station", "timestamp", "defect_type", "severity",
  66. "x_mm", "y_mm", "panel_width_mm", "panel_height_mm",
  67. "hour", "shift", "day",
  68. ]
  69. uploaded_df = None
  70. if data_source == "上传CSV文件":
  71. uploaded_file = st.sidebar.file_uploader("上传CSV文件", type=["csv"], accept_multiple_files=False)
  72. if uploaded_file is not None:
  73. try:
  74. uploaded_df = pd.read_csv(uploaded_file, parse_dates=["timestamp"])
  75. uploaded_df["timestamp"] = pd.to_datetime(uploaded_df["timestamp"])
  76. missing = [c for c in REQUIRED_COLUMNS if c not in uploaded_df.columns]
  77. if missing:
  78. st.sidebar.error(f"缺少字段: {', '.join(missing)}")
  79. uploaded_df = None
  80. else:
  81. st.sidebar.success(f"已加载 {len(uploaded_df)} 条记录")
  82. # 下载模板
  83. template_df = pd.DataFrame(columns=REQUIRED_COLUMNS)
  84. csv_template = template_df.to_csv(index=False, encoding="utf-8-sig")
  85. st.sidebar.download_button(
  86. label="📋 下载数据格式模板",
  87. data=csv_template,
  88. file_name="defect_data_template.csv",
  89. mime="text/csv"
  90. )
  91. except Exception as e:
  92. st.sidebar.error(f"CSV解析失败: {e}")
  93. uploaded_df = None
  94. else:
  95. st.sidebar.info("请选择一个CSV文件上传")
  96. # --- 加载数据 ---
  97. @st.cache_data(ttl=300)
  98. def load_data_from_csv():
  99. """加载内置模拟数据"""
  100. if not os.path.exists("defect_data.csv"):
  101. st.error("未找到 defect_data.csv,请先运行 generate_data.py 生成数据")
  102. return None
  103. df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
  104. df["timestamp"] = pd.to_datetime(df["timestamp"])
  105. return df
  106. if data_source == "上传CSV文件" and uploaded_df is not None:
  107. df = uploaded_df
  108. else:
  109. df = load_data_from_csv()
  110. if df is None:
  111. st.stop()
  112. # --- 角色视图 ---
  113. st.sidebar.divider()
  114. st.sidebar.subheader("👤 视图模式")
  115. view_mode = st.sidebar.selectbox(
  116. "选择视图模式",
  117. options=["操作员", "工程师", "管理者"],
  118. index=1,
  119. help="操作员: 基础分析 | 工程师: 全部功能 | 管理者: KPI+SPC+健康评分"
  120. )
  121. # 各角色可见的 Tab
  122. tab_visibility = {
  123. "操作员": {
  124. "tabs": ["🗺️ 空间集中性", "📊 类型集中性 (帕累托)", "📈 时间集中性",
  125. "🏗️ 设备座号集中性", "🔬 缺陷模式识别"],
  126. "show_kpi": True,
  127. "show_export": True,
  128. },
  129. "工程师": {
  130. "tabs": "all",
  131. "show_kpi": True,
  132. "show_export": True,
  133. },
  134. "管理者": {
  135. "tabs": ["🚨 SPC 控制图与预警", "🔬 缺陷模式识别", "💚 设备健康与共性分析",
  136. "📊 类型集中性 (帕累托)", "📈 时间集中性"],
  137. "show_kpi": True,
  138. "show_export": True,
  139. },
  140. }
  141. # 应用 Tab 可见性
  142. current_config = tab_visibility[view_mode]
  143. # --- 筛选条件 ---
  144. # 日期范围
  145. min_date = df["timestamp"].min().date()
  146. max_date = df["timestamp"].max().date()
  147. date_range = st.sidebar.date_input(
  148. "日期范围",
  149. value=[min_date, max_date],
  150. min_value=min_date,
  151. max_value=max_date
  152. )
  153. if len(date_range) == 2:
  154. start_date, end_date = pd.Timestamp(date_range[0]), pd.Timestamp(date_range[1])
  155. else:
  156. start_date, end_date = pd.Timestamp(min_date), pd.Timestamp(max_date)
  157. # 缺陷类型
  158. all_types = sorted(df["defect_type"].unique())
  159. selected_types = st.sidebar.multiselect("缺陷类型", options=all_types, default=all_types)
  160. # 班次
  161. shift_options = ["全部", "白班", "夜班"]
  162. selected_shift = st.sidebar.radio("班次", options=shift_options)
  163. # 批次
  164. all_batches = sorted(df["batch_id"].unique())
  165. selected_batches = st.sidebar.multiselect("批次", options=all_batches, default=all_batches[:5])
  166. # 严重程度
  167. all_severities = ["全部", "轻微", "中等", "严重"]
  168. selected_severity = st.sidebar.selectbox("严重程度", options=all_severities)
  169. # 设备
  170. all_equipment = sorted(df["equipment_id"].unique())
  171. selected_equipment = st.sidebar.multiselect("前贴附设备", options=all_equipment, default=all_equipment)
  172. # 座号(随设备联动)
  173. if selected_equipment:
  174. eq_seats = sorted(df[df["equipment_id"].isin(selected_equipment)]["seat_id"].unique())
  175. selected_seats = st.sidebar.multiselect("座号", options=eq_seats, default=eq_seats)
  176. else:
  177. selected_seats = []
  178. # 应用筛选
  179. mask = (
  180. (df["timestamp"] >= start_date) &
  181. (df["timestamp"] <= end_date) &
  182. (df["defect_type"].isin(selected_types)) &
  183. (df["batch_id"].isin(selected_batches)) &
  184. (df["equipment_id"].isin(selected_equipment))
  185. )
  186. if selected_shift != "全部":
  187. mask &= (df["shift"] == selected_shift)
  188. if selected_severity != "全部":
  189. mask &= (df["severity"] == selected_severity)
  190. if selected_seats:
  191. mask &= (df["seat_id"].isin(selected_seats))
  192. filtered_df = df[mask].copy()
  193. # ========== KPI 看板 ==========
  194. total_panels_inspected = df[df["timestamp"] >= start_date]["panel_id"].nunique()
  195. defective_panels = filtered_df["panel_id"].nunique()
  196. yield_rate = (1 - defective_panels / max(total_panels_inspected, 1)) * 100
  197. total_defects = len(filtered_df)
  198. critical_defects = (filtered_df["severity"] == "严重").sum()
  199. top_defect_type = filtered_df["defect_type"].mode().iloc[0] if len(filtered_df) > 0 else "-"
  200. kpi1, kpi2, kpi3, kpi4, kpi5, kpi6 = st.columns(6)
  201. kpi1.metric("检测面板数", f"{total_panels_inspected} 块")
  202. kpi2.metric("不良面板数", f"{defective_panels} 块", delta=f"{defective_panels/total_panels_inspected*100:.1f}%" if total_panels_inspected > 0 else "0%")
  203. kpi3.metric("综合良率", f"{yield_rate:.1f}%", delta=f"{yield_rate - 95:.1f}%", delta_color="normal" if yield_rate >= 95 else "inverse")
  204. kpi4.metric("缺陷总数", f"{total_defects} 个")
  205. kpi5.metric("严重缺陷", f"{critical_defects} 个", delta=f"{critical_defects/max(total_defects,1)*100:.1f}%" if total_defects > 0 else "0%")
  206. kpi6.metric("主要缺陷类型", top_defect_type)
  207. # 第二排 KPI
  208. eq_concentrated = False
  209. if "equipment_id" in filtered_df.columns:
  210. eq_stats = filtered_df.groupby("equipment_id").size()
  211. top_eq = eq_stats.idxmax() if len(eq_stats) > 0 else "-"
  212. top_eq_count = eq_stats.max() if len(eq_stats) > 0 else 0
  213. else:
  214. top_eq, top_eq_count = "-", 0
  215. seat_concentrated = False
  216. if "seat_id" in filtered_df.columns and len(filtered_df) > 0:
  217. seat_stats = filtered_df.groupby("seat_id").size()
  218. if len(seat_stats) > 0:
  219. top_seat = seat_stats.idxmax()
  220. top_seat_count = seat_stats.max()
  221. avg_seat_count = seat_stats.mean()
  222. if top_seat_count > avg_seat_count * 2:
  223. seat_concentrated = True
  224. else:
  225. top_seat, top_seat_count = "-", 0
  226. else:
  227. top_seat, top_seat_count = "-", 0
  228. kpi7, kpi8, kpi9 = st.columns(3)
  229. kpi7.metric("最高缺陷设备", str(top_eq), f"{top_eq_count} 个缺陷")
  230. kpi8.metric("最高缺陷座号", str(top_seat), f"{top_seat_count} 个缺陷")
  231. if seat_concentrated:
  232. kpi9.metric("座号集中性", "⚠️ 存在集中", delta="需关注", delta_color="inverse")
  233. else:
  234. kpi9.metric("座号集中性", "✅ 正常分布")
  235. # --- 主标题 ---
  236. st.title("📊 屏幕缺陷集中性分析系统")
  237. st.markdown(f"**数据范围**: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')} | "
  238. f"**筛选后缺陷数**: {len(filtered_df)} 条 | "
  239. f"**涉及面板**: {filtered_df['panel_id'].nunique()} 块")
  240. st.divider()
  241. # --- Tab 布局 (按角色动态) ---
  242. ALL_TABS = [
  243. "🗺️ 空间集中性",
  244. "📊 类型集中性 (帕累托)",
  245. "📈 时间集中性",
  246. "🏭 批次集中性",
  247. "🏗️ 设备座号集中性",
  248. "🔗 关联分析",
  249. "🧠 智能缺陷聚类 (DBSCAN)",
  250. "🚨 SPC 控制图与预警",
  251. "🔬 缺陷模式识别",
  252. "💚 设备健康与共性分析",
  253. "🔲 多层叠加分析"
  254. ]
  255. if current_config["tabs"] == "all":
  256. visible_tabs = ALL_TABS
  257. else:
  258. visible_tabs = [t for t in ALL_TABS if t in current_config["tabs"]]
  259. tab_containers = st.tabs(visible_tabs)
  260. tab_map = {name: container for name, container in zip(visible_tabs, tab_containers)}
  261. def get_tab(name):
  262. """获取指定 Tab 容器,如果不可见则返回 None"""
  263. return tab_map.get(name)
  264. # ========== Tab 1: 空间集中性 ==========
  265. _t = get_tab("🗺️ 空间集中性")
  266. if _t:
  267. with _t:
  268. st.header("缺陷空间分布热力图")
  269. col1, col2 = st.columns([2, 1])
  270. with col1:
  271. # 热力图分辨率
  272. grid_size = st.slider("热力图网格分辨率", min_value=5, max_value=50, value=20)
  273. fig, axes = plt.subplots(1, 2, figsize=(14, 6))
  274. # 左图:2D 热力图
  275. x_edges = np.linspace(0, df["panel_width_mm"].iloc[0], grid_size + 1)
  276. y_edges = np.linspace(0, df["panel_height_mm"].iloc[0], grid_size + 1)
  277. H, _, _ = np.histogram2d(
  278. filtered_df["x_mm"], filtered_df["y_mm"],
  279. bins=[x_edges, y_edges]
  280. )
  281. im = axes[0].imshow(
  282. H.T, origin="lower", aspect="auto",
  283. extent=[0, df["panel_width_mm"].iloc[0], 0, df["panel_height_mm"].iloc[0]],
  284. cmap="YlOrRd"
  285. )
  286. axes[0].set_title(f"缺陷密度热力图 (总 {len(filtered_df)} 个)")
  287. axes[0].set_xlabel("X (mm)")
  288. axes[0].set_ylabel("Y (mm)")
  289. plt.colorbar(im, ax=axes[0], label="缺陷数量")
  290. # 右图:散点图(叠加)
  291. axes[1].scatter(
  292. filtered_df["x_mm"], filtered_df["y_mm"],
  293. alpha=0.3, s=5, c="red", edgecolors="none"
  294. )
  295. axes[1].set_title("缺陷位置散点图")
  296. axes[1].set_xlabel("X (mm)")
  297. axes[1].set_ylabel("Y (mm)")
  298. axes[1].set_aspect("equal")
  299. st.pyplot(fig)
  300. plt.close()
  301. with col2:
  302. st.subheader("区域统计")
  303. # 将面板分为 9 宫格
  304. x_bins = pd.cut(filtered_df["x_mm"], bins=3, labels=["左", "中", "右"])
  305. y_bins = pd.cut(filtered_df["y_mm"], bins=3, labels=["上", "中", "下"])
  306. region_df = pd.DataFrame({"X区域": x_bins, "Y区域": y_bins})
  307. region_counts = region_df.groupby(["X区域", "Y区域"], observed=False).size().unstack(fill_value=0)
  308. st.dataframe(region_counts, use_container_width=True)
  309. # 高频缺陷区域 TOP5
  310. st.subheader("高频缺陷区域 TOP5")
  311. region_df["区域"] = region_df["X区域"].astype(str) + "-" + region_df["Y区域"].astype(str)
  312. top_regions = region_df["区域"].value_counts().head(5)
  313. for i, (region, count) in enumerate(top_regions.items(), 1):
  314. st.metric(f"#{i} {region}", f"{count} 个缺陷")
  315. # --- 模拟面板缺陷标注图 ---
  316. st.divider()
  317. st.subheader("🖼️ 模拟面板缺陷标注图")
  318. st.markdown("选择批次和面板,查看缺陷在面板上的实际分布标注(按缺陷类型用不同颜色/形状区分)")
  319. ann_col1, ann_col2, ann_col3 = st.columns(3)
  320. with ann_col1:
  321. ann_batch = st.selectbox("选择批次", options=sorted(filtered_df["batch_id"].unique()), key="ann_batch")
  322. with ann_col2:
  323. panels_in_batch = sorted(filtered_df[filtered_df["batch_id"] == ann_batch]["panel_id"].unique())
  324. ann_panel = st.selectbox("选择面板", options=panels_in_batch, key="ann_panel")
  325. with ann_col3:
  326. ann_show_label = st.checkbox("显示缺陷标签", value=True)
  327. panel_defects = filtered_df[(filtered_df["batch_id"] == ann_batch) & (filtered_df["panel_id"] == ann_panel)]
  328. if len(panel_defects) == 0:
  329. st.warning(f"当前面板 **{ann_panel}** (批次 {ann_batch}) 在筛选条件下无缺陷记录,请调整筛选条件或选择其他面板")
  330. else:
  331. pw = df["panel_width_mm"].iloc[0]
  332. ph = df["panel_height_mm"].iloc[0]
  333. # 缺陷类型 → 颜色/形状映射
  334. type_style = {
  335. "划痕": {"color": "red", "marker": "x", "size": 80},
  336. "亮点": {"color": "yellow", "marker": "o", "size": 60},
  337. "暗点": {"color": "black", "marker": "x", "size": 60},
  338. "气泡": {"color": "cyan", "marker": "o", "size": 100},
  339. "色差": {"color": "magenta", "marker": "s", "size": 70},
  340. "漏光": {"color": "orange", "marker": "D", "size": 80},
  341. "裂纹": {"color": "darkred", "marker": "v", "size": 90},
  342. "异物": {"color": "green", "marker": "P", "size": 80},
  343. }
  344. fig_ann, ax_ann = plt.subplots(figsize=(3.5, 5))
  345. # 面板背景(模拟屏幕灰色渐变)
  346. ax_ann.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  347. # 内框(模拟屏幕可视区域)
  348. margin = 8
  349. ax_ann.add_patch(plt.Rectangle((margin, margin), pw - 2*margin, ph - 2*margin,
  350. facecolor="#16213e", edgecolor="#0f3460", linewidth=1.5))
  351. # FPC绑定区域标注
  352. fpc_y = ph * 0.7
  353. ax_ann.axhline(y=fpc_y, color="#555", linestyle="--", alpha=0.4, linewidth=0.5)
  354. ax_ann.text(pw/2, fpc_y + 2, "FPC区", color="#666", fontsize=7, ha="center", alpha=0.5)
  355. # 绘制缺陷标注
  356. for _, row in panel_defects.iterrows():
  357. style = type_style.get(row["defect_type"], {"color": "white", "marker": "o", "size": 50})
  358. severity_size = {"轻微": 0.7, "中等": 1.0, "严重": 1.4}.get(row["severity"], 1.0)
  359. ax_ann.scatter(row["x_mm"], row["y_mm"],
  360. c=style["color"], marker=style["marker"],
  361. s=style["size"] * severity_size,
  362. edgecolors="white", linewidth=0.3, alpha=0.85, zorder=3)
  363. if ann_show_label:
  364. ax_ann.annotate(row["defect_type"][:2],
  365. (row["x_mm"], row["y_mm"]),
  366. fontsize=5, color="white",
  367. ha="center", va="bottom", alpha=0.7, zorder=4)
  368. # 图例
  369. legend_elements = [plt.Line2D([0], [0], marker=type_style[t]["marker"], color="w",
  370. markerfacecolor=type_style[t]["color"], markersize=8,
  371. label=t, markeredgewidth=0.5, markeredgecolor="white")
  372. for t in type_style]
  373. ax_ann.legend(handles=legend_elements, loc="upper right", fontsize=7,
  374. framealpha=0.7, facecolor="#222", edgecolor="#555")
  375. ax_ann.set_xlim(-5, pw + 5)
  376. ax_ann.set_ylim(-5, ph + 5)
  377. ax_ann.set_title(f"面板 {ann_panel} | 批次 {ann_batch} | {len(panel_defects)} 个缺陷",
  378. fontsize=11, pad=10)
  379. ax_ann.set_xlabel("X (mm)")
  380. ax_ann.set_ylabel("Y (mm)")
  381. ax_ann.set_aspect("equal")
  382. ax_ann.grid(True, alpha=0.1, color="gray")
  383. st.pyplot(fig_ann)
  384. plt.close()
  385. # ========== Tab 2: 帕累托分析 ==========
  386. _t = get_tab("📊 类型集中性 (帕累托)")
  387. if _t:
  388. with _t:
  389. st.header("缺陷类型帕累托分析")
  390. type_counts = filtered_df["defect_type"].value_counts().reset_index()
  391. type_counts.columns = ["缺陷类型", "数量"]
  392. type_counts = type_counts.sort_values("数量", ascending=False).reset_index(drop=True)
  393. type_counts["累计占比"] = type_counts["数量"].cumsum() / type_counts["数量"].sum() * 100
  394. type_counts["占比"] = type_counts["数量"] / type_counts["数量"].sum() * 100
  395. fig, ax1 = plt.subplots(figsize=(10, 5))
  396. # 柱状图
  397. bars = ax1.bar(type_counts["缺陷类型"], type_counts["数量"], color="steelblue", alpha=0.8)
  398. ax1.set_xlabel("缺陷类型")
  399. ax1.set_ylabel("数量", color="steelblue")
  400. ax1.set_title("帕累托图 - 缺陷类型分布")
  401. # 累计占比折线
  402. ax2 = ax1.twinx()
  403. ax2.plot(type_counts["缺陷类型"], type_counts["累计占比"], color="red", marker="o", linewidth=2)
  404. ax2.axhline(y=80, color="green", linestyle="--", alpha=0.5, label="80%线")
  405. ax2.set_ylabel("累计占比 (%)", color="red")
  406. ax2.set_ylim(0, 110)
  407. # 标注数值
  408. for bar, count in zip(bars, type_counts["数量"]):
  409. ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2,
  410. str(count), ha="center", va="bottom", fontsize=9)
  411. st.pyplot(fig)
  412. plt.close()
  413. # 数据表格
  414. st.subheader("详细数据")
  415. st.dataframe(type_counts, use_container_width=True)
  416. # 严重程度分布
  417. st.subheader("按严重程度分布")
  418. sev_counts = filtered_df["severity"].value_counts()
  419. fig2, ax = plt.subplots(figsize=(6, 4))
  420. colors = {"轻微": "#4CAF50", "中等": "#FF9800", "严重": "#F44336"}
  421. sev_counts.plot(kind="bar", ax=ax, color=[colors.get(s, "gray") for s in sev_counts.index])
  422. ax.set_title("缺陷严重程度分布")
  423. ax.set_ylabel("数量")
  424. st.pyplot(fig2)
  425. plt.close()
  426. # ========== Tab 3: 时间集中性 ==========
  427. _t = get_tab("📈 时间集中性")
  428. if _t:
  429. with _t:
  430. st.header("缺陷时间分布趋势")
  431. col1, col2 = st.columns(2)
  432. with col1:
  433. # 按天趋势
  434. daily = filtered_df.groupby("day").size().reset_index(name="缺陷数")
  435. daily["day"] = pd.to_datetime(daily["day"])
  436. fig1, ax1 = plt.subplots(figsize=(10, 4))
  437. ax1.plot(daily["day"], daily["缺陷数"], marker="o", markersize=3, linewidth=1.5, color="steelblue")
  438. ax1.fill_between(daily["day"], daily["缺陷数"], alpha=0.2, color="steelblue")
  439. ax1.set_title("每日缺陷数量趋势")
  440. ax1.set_ylabel("缺陷数量")
  441. ax1.tick_params(axis="x", rotation=45)
  442. # 移动平均
  443. if len(daily) > 3:
  444. daily["移动平均(3天)"] = daily["缺陷数"].rolling(window=3, min_periods=1).mean()
  445. ax1.plot(daily["day"], daily["移动平均(3天)"], color="red", linestyle="--",
  446. linewidth=2, alpha=0.7, label="3日移动平均")
  447. ax1.legend()
  448. st.pyplot(fig1)
  449. plt.close()
  450. with col2:
  451. # 按小时分布
  452. hourly = filtered_df.groupby("hour").size().reindex(range(24), fill_value=0)
  453. fig2, ax2 = plt.subplots(figsize=(10, 4))
  454. colors = ["#FF6B6B" if (h >= 17 or h < 8) else "#4ECDC4" for h in hourly.index]
  455. ax2.bar(hourly.index, hourly.values, color=colors, alpha=0.8)
  456. ax2.set_title("每小时缺陷分布 (红色=夜班)")
  457. ax2.set_xlabel("小时")
  458. ax2.set_ylabel("缺陷数量")
  459. st.pyplot(fig2)
  460. plt.close()
  461. # 班次对比
  462. st.subheader("班次对比")
  463. shift_stats = filtered_df.groupby("shift").agg({
  464. "defect_id": "count",
  465. "panel_id": "nunique"
  466. }).rename(columns={"defect_id": "缺陷数", "panel_id": "涉及面板数"})
  467. st.dataframe(shift_stats, use_container_width=True)
  468. # 每周分布
  469. st.subheader("按星期分布")
  470. filtered_df_copy = filtered_df.copy()
  471. filtered_df_copy["weekday"] = filtered_df_copy["timestamp"].dt.day_name()
  472. weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
  473. weekday_cn = {"Monday": "周一", "Tuesday": "周二", "Wednesday": "周三",
  474. "Thursday": "周四", "Friday": "周五", "Saturday": "周六", "Sunday": "周日"}
  475. filtered_df_copy["星期"] = filtered_df_copy["weekday"].map(weekday_cn)
  476. weekday_counts = filtered_df_copy.groupby("星期").size().reindex(
  477. [weekday_cn[d] for d in weekday_order], fill_value=0
  478. )
  479. fig3, ax3 = plt.subplots(figsize=(8, 4))
  480. ax3.bar(range(7), weekday_counts.values, color="steelblue", alpha=0.8)
  481. ax3.set_xticks(range(7))
  482. ax3.set_xticklabels(weekday_counts.index)
  483. ax3.set_title("按星期分布")
  484. ax3.set_ylabel("缺陷数量")
  485. st.pyplot(fig3)
  486. plt.close()
  487. # ========== Tab 4: 批次集中性 ==========
  488. _t = get_tab("🏭 批次集中性")
  489. if _t:
  490. with _t:
  491. st.header("批次缺陷集中性分析")
  492. batch_stats = filtered_df.groupby("batch_id").agg({
  493. "defect_id": "count",
  494. "panel_id": "nunique",
  495. "severity": lambda x: (x == "严重").sum()
  496. }).rename(columns={"defect_id": "缺陷数", "panel_id": "面板数", "severity": "严重缺陷数"})
  497. batch_stats["缺陷率"] = batch_stats["缺陷数"] / batch_stats["面板数"]
  498. batch_stats = batch_stats.sort_index()
  499. col1, col2 = st.columns(2)
  500. with col1:
  501. fig1, ax1 = plt.subplots(figsize=(10, 4))
  502. ax1.bar(range(len(batch_stats)), batch_stats["缺陷数"], color="steelblue", alpha=0.8)
  503. ax1.set_title("各批次缺陷数量")
  504. ax1.set_xlabel("批次")
  505. ax1.set_ylabel("缺陷数")
  506. ax1.set_xticks(range(len(batch_stats)))
  507. ax1.set_xticklabels(batch_stats.index, rotation=90, fontsize=7)
  508. st.pyplot(fig1)
  509. plt.close()
  510. with col2:
  511. fig2, ax2 = plt.subplots(figsize=(10, 4))
  512. ax2.plot(range(len(batch_stats)), batch_stats["缺陷率"], marker="o", markersize=3,
  513. color="red", linewidth=1.5)
  514. ax2.axhline(y=batch_stats["缺陷率"].mean(), color="green", linestyle="--",
  515. label=f"平均缺陷率: {batch_stats['缺陷率'].mean():.2%}")
  516. ax2.set_title("各批次缺陷率趋势")
  517. ax2.set_xlabel("批次")
  518. ax2.set_ylabel("缺陷率")
  519. ax2.set_xticks(range(len(batch_stats)))
  520. ax2.set_xticklabels(batch_stats.index, rotation=90, fontsize=7)
  521. ax2.legend()
  522. st.pyplot(fig2)
  523. plt.close()
  524. # 异常批次
  525. st.subheader("异常批次 (缺陷率 > 平均值 + 1倍标准差)")
  526. threshold = batch_stats["缺陷率"].mean() + batch_stats["缺陷率"].std()
  527. abnormal = batch_stats[batch_stats["缺陷率"] > threshold].sort_values("缺陷率", ascending=False)
  528. if len(abnormal) > 0:
  529. st.dataframe(abnormal, use_container_width=True)
  530. else:
  531. st.success("未发现异常批次")
  532. # ========== Tab 5: 设备座号集中性 ==========
  533. _t = get_tab("🏗️ 设备座号集中性")
  534. if _t:
  535. with _t:
  536. st.header("🏗️ 前贴附制程设备座号集中性分析")
  537. st.markdown(
  538. "分析缺陷是否集中在特定设备的特定座号(工位)。"
  539. "如果某个座号缺陷明显多于其他座号,说明该座号对应的设备局部存在问题(如吸嘴老化、加热不均、压力异常等)。"
  540. )
  541. # --- 设备对比 ---
  542. st.subheader("设备级别对比")
  543. eq_stats = filtered_df.groupby("equipment_id").agg({
  544. "defect_id": "count",
  545. "panel_id": "nunique",
  546. "severity": lambda x: (x == "严重").sum()
  547. }).rename(columns={"defect_id": "缺陷数", "panel_id": "面板数", "severity": "严重缺陷"})
  548. eq_stats["缺陷率"] = eq_stats["缺陷数"] / eq_stats["面板数"]
  549. eq_stats = eq_stats.sort_values("缺陷数", ascending=False)
  550. col_eq1, col_eq2 = st.columns(2)
  551. with col_eq1:
  552. fig_eq1, ax_eq1 = plt.subplots(figsize=(8, 4))
  553. bars1 = ax_eq1.bar(range(len(eq_stats)), eq_stats["缺陷数"], color=["#FF6B6B", "#4ECDC4", "#45B7D1"][:len(eq_stats)], alpha=0.8)
  554. ax_eq1.set_xticks(range(len(eq_stats)))
  555. ax_eq1.set_xticklabels(eq_stats.index, fontsize=10)
  556. ax_eq1.set_ylabel("缺陷数量")
  557. ax_eq1.set_title("各设备缺陷总数")
  558. for bar, count in zip(bars1, eq_stats["缺陷数"]):
  559. ax_eq1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 3,
  560. str(count), ha="center", va="bottom", fontsize=10, fontweight="bold")
  561. st.pyplot(fig_eq1)
  562. plt.close()
  563. with col_eq2:
  564. fig_eq2, ax_eq2 = plt.subplots(figsize=(8, 4))
  565. bars2 = ax_eq2.bar(range(len(eq_stats)), eq_stats["缺陷率"] * 100,
  566. color=["#FF6B6B", "#4ECDC4", "#45B7D1"][:len(eq_stats)], alpha=0.8)
  567. ax_eq2.set_xticks(range(len(eq_stats)))
  568. ax_eq2.set_xticklabels(eq_stats.index, fontsize=10)
  569. ax_eq2.set_ylabel("缺陷率 (%)")
  570. ax_eq2.set_title("各设备缺陷率")
  571. for bar, rate in zip(bars2, eq_stats["缺陷率"] * 100):
  572. ax_eq2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.3,
  573. f"{rate:.1f}%", ha="center", va="bottom", fontsize=10, fontweight="bold")
  574. st.pyplot(fig_eq2)
  575. plt.close()
  576. st.dataframe(eq_stats, use_container_width=True)
  577. # --- 座号级别分析 ---
  578. st.divider()
  579. st.subheader("座号级别缺陷分布")
  580. # 选择设备查看座号
  581. eq_for_seat = st.selectbox("选择设备查看座号分布", options=sorted(filtered_df["equipment_id"].unique()), key="eq_seat")
  582. eq_data = filtered_df[filtered_df["equipment_id"] == eq_for_seat]
  583. eq_info = None
  584. for eq_name, info in [("LAM-A01", {"rows": 4, "cols": 5}), ("LAM-A02", {"rows": 4, "cols": 5}), ("LAM-B01", {"rows": 5, "cols": 4})]:
  585. if eq_name == eq_for_seat:
  586. eq_info = info
  587. break
  588. seat_counts = eq_data.groupby("seat_id").size().reset_index(name="缺陷数")
  589. seat_counts = seat_counts.sort_values("缺陷数", ascending=False)
  590. if eq_info:
  591. # 网格热力图
  592. grid = np.zeros((eq_info["rows"], eq_info["cols"]))
  593. seat_to_defects = eq_data.groupby("seat_id").size().to_dict()
  594. for r in range(1, eq_info["rows"] + 1):
  595. for c in range(1, eq_info["cols"] + 1):
  596. seat_name = f"R{r}C{c}"
  597. grid[r - 1, c - 1] = seat_to_defects.get(seat_name, 0)
  598. fig_grid, ax_grid = plt.subplots(figsize=(8, 6))
  599. im = ax_grid.imshow(grid, cmap="YlOrRd", aspect="equal")
  600. ax_grid.set_title(f"{eq_for_seat} 座号缺陷热力图")
  601. ax_grid.set_xlabel("列号")
  602. ax_grid.set_ylabel("行号")
  603. ax_grid.set_xticks(range(eq_info["cols"]))
  604. ax_grid.set_xticklabels([f"C{i+1}" for i in range(eq_info["cols"])])
  605. ax_grid.set_yticks(range(eq_info["rows"]))
  606. ax_grid.set_yticklabels([f"R{i+1}" for i in range(eq_info["rows"])])
  607. # 标注数值
  608. for r in range(eq_info["rows"]):
  609. for c in range(eq_info["cols"]):
  610. val = int(grid[r, c])
  611. color = "white" if val > grid.max() * 0.7 else "black"
  612. ax_grid.text(c, r, str(val), ha="center", va="center", fontsize=10,
  613. color=color, fontweight="bold")
  614. plt.colorbar(im, ax=ax_grid, label="缺陷数量")
  615. st.pyplot(fig_grid)
  616. plt.close()
  617. else:
  618. fig_bar, ax_bar = plt.subplots(figsize=(10, 4))
  619. ax_bar.bar(range(len(seat_counts)), seat_counts["缺陷数"], color="steelblue", alpha=0.8)
  620. ax_bar.set_xticks(range(len(seat_counts)))
  621. ax_bar.set_xticklabels(seat_counts["seat_id"], rotation=45, fontsize=8)
  622. ax_bar.set_ylabel("缺陷数量")
  623. ax_bar.set_title("座号缺陷分布")
  624. st.pyplot(fig_bar)
  625. plt.close()
  626. # 座号数据表格
  627. st.dataframe(seat_counts, use_container_width=True)
  628. # --- 异常座号检测 ---
  629. st.divider()
  630. st.subheader("异常座号检测")
  631. all_seat_stats = filtered_df.groupby(["equipment_id", "seat_id"]).size().reset_index(name="缺陷数")
  632. overall_mean = all_seat_stats["缺陷数"].mean()
  633. overall_std = all_seat_stats["缺陷数"].std()
  634. threshold_1x = overall_mean + overall_std
  635. threshold_2x = overall_mean + 2 * overall_std
  636. st.info(f"📊 全局统计: 平均每个座号 **{overall_mean:.1f}** 个缺陷 | 标准差 **{overall_std:.1f}**")
  637. col_anom1, col_anom2 = st.columns(2)
  638. with col_anom1:
  639. st.markdown(f"**⚠️ 1σ 预警座号** (缺陷数 > {threshold_1x:.0f})")
  640. warning_seats = all_seat_stats[all_seat_stats["缺陷数"] > threshold_1x].sort_values("缺陷数", ascending=False)
  641. if len(warning_seats) > 0:
  642. st.dataframe(warning_seats.reset_index(drop=True), use_container_width=True)
  643. else:
  644. st.success("无预警座号")
  645. with col_anom2:
  646. st.markdown(f"**🔴 2σ 异常座号** (缺陷数 > {threshold_2x:.0f})")
  647. critical_seats = all_seat_stats[all_seat_stats["缺陷数"] > threshold_2x].sort_values("缺陷数", ascending=False)
  648. if len(critical_seats) > 0:
  649. st.dataframe(critical_seats.reset_index(drop=True), use_container_width=True)
  650. else:
  651. st.success("无异常座号")
  652. # --- 座号 × 缺陷类型 交叉分析 ---
  653. st.divider()
  654. st.subheader("座号 × 缺陷类型 交叉分析")
  655. st.markdown("识别哪些座号偏向产生特定类型的缺陷(如 R2C3 座号主要产生气泡 → 吸嘴问题)")
  656. if eq_info:
  657. eq_seat_type = eq_data.groupby(["seat_id", "defect_type"]).size().unstack(fill_value=0)
  658. fig_ct, ax_ct = plt.subplots(figsize=(10, 6))
  659. sns.heatmap(eq_seat_type, annot=True, fmt="d", cmap="YlOrRd", ax=ax_ct,
  660. linewidths=0.5, linecolor="white")
  661. ax_ct.set_title(f"{eq_for_seat} 座号 × 缺陷类型 热力图")
  662. st.pyplot(fig_ct)
  663. plt.close()
  664. # ========== Tab 6: 关联分析 ==========
  665. _t = get_tab("🔗 关联分析")
  666. if _t:
  667. with _t:
  668. st.header("缺陷关联分析")
  669. col1, col2 = st.columns(2)
  670. with col1:
  671. # 缺陷类型 x 严重程度 交叉表
  672. ct = pd.crosstab(filtered_df["defect_type"], filtered_df["severity"])
  673. fig1, ax1 = plt.subplots(figsize=(8, 5))
  674. sns.heatmap(ct, annot=True, fmt="d", cmap="YlOrRd", ax=ax1,
  675. linewidths=0.5, linecolor="white")
  676. ax1.set_title("缺陷类型 × 严重程度 热力图")
  677. st.pyplot(fig1)
  678. plt.close()
  679. with col2:
  680. # 缺陷类型 x 班次 交叉表
  681. ct2 = pd.crosstab(filtered_df["defect_type"], filtered_df["shift"])
  682. fig2, ax2 = plt.subplots(figsize=(8, 5))
  683. sns.heatmap(ct2, annot=True, fmt="d", cmap="Blues", ax=ax2,
  684. linewidths=0.5, linecolor="white")
  685. ax2.set_title("缺陷类型 × 班次 热力图")
  686. st.pyplot(fig2)
  687. plt.close()
  688. # 面板缺陷 TOP10
  689. st.subheader("缺陷最多的面板 TOP10")
  690. panel_defects = filtered_df.groupby("panel_id").agg({
  691. "defect_id": "count",
  692. "defect_type": lambda x: x.mode().iloc[0] if len(x) > 0 else "N/A"
  693. }).rename(columns={"defect_id": "缺陷数", "defect_type": "主要缺陷类型"})
  694. panel_defects = panel_defects.sort_values("缺陷数", ascending=False).head(10)
  695. st.dataframe(panel_defects, use_container_width=True)
  696. # 面板缺陷分布
  697. fig3, ax3 = plt.subplots(figsize=(8, 4))
  698. panel_counts = filtered_df.groupby("panel_id").size()
  699. ax3.hist(panel_counts, bins=20, color="steelblue", alpha=0.8, edgecolor="white")
  700. ax3.set_title("单面板缺陷数量分布")
  701. ax3.set_xlabel("缺陷数/面板")
  702. ax3.set_ylabel("面板数量")
  703. ax3.axvline(x=panel_counts.mean(), color="red", linestyle="--", label=f"平均: {panel_counts.mean():.1f}")
  704. ax3.legend()
  705. st.pyplot(fig3)
  706. plt.close()
  707. # --- 智能缺陷聚类 (DBSCAN + PCA) ---
  708. _t = get_tab("🧠 智能缺陷聚类 (DBSCAN)")
  709. if _t:
  710. with _t:
  711. st.header("🧠 DBSCAN 智能缺陷空间聚类")
  712. st.markdown(
  713. "**原理**: DBSCAN 是基于密度的空间聚类算法,能自动识别任意形状的缺陷聚集区域,"
  714. "无需预设聚类数量,自动过滤随机散落的噪声缺陷。"
  715. "行业标准:半导体晶圆/面板缺陷模式识别首选算法。"
  716. )
  717. col1, col2 = st.columns([2, 1])
  718. with col1:
  719. # --- 参数控制 ---
  720. st.subheader("参数设置")
  721. p_col1, p_col2 = st.columns(2)
  722. with p_col1:
  723. eps = st.slider(
  724. "eps (邻域半径 mm)",
  725. min_value=5.0, max_value=100.0, value=25.0, step=5.0,
  726. help="两个点被视为'邻居'的最大距离。值越大,簇越大。"
  727. )
  728. with p_col2:
  729. min_samples = st.slider(
  730. "min_samples (最小簇点数)",
  731. min_value=3, max_value=50, value=10,
  732. help="形成一个簇所需的最小点数。值越大,越严格的聚集才算簇。"
  733. )
  734. # --- 执行聚类 ---
  735. coords = filtered_df[["x_mm", "y_mm"]].values
  736. scaler = StandardScaler()
  737. coords_scaled = scaler.fit_transform(coords)
  738. dbscan = DBSCAN(eps=eps / scaler.scale_[0], min_samples=min_samples)
  739. filtered_df["cluster"] = dbscan.fit_predict(coords_scaled)
  740. # 统计聚类结果
  741. n_clusters = len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0)
  742. n_noise = list(dbscan.labels_).count(-1)
  743. st.info(f"📊 **聚类结果**: 发现 **{n_clusters}** 个缺陷聚集区域,**{n_noise}** 个噪声点(随机散落缺陷)")
  744. # --- 可视化 ---
  745. fig, axes = plt.subplots(1, 2, figsize=(14, 6))
  746. # 左图:聚类结果(空间位置)
  747. labels = filtered_df["cluster"].values
  748. unique_labels = set(labels)
  749. colors = plt.cm.get_cmap("tab20", len(unique_labels) if len(unique_labels) > 0 else 1)
  750. for k in unique_labels:
  751. if k == -1:
  752. # 噪声点
  753. xy = filtered_df[labels == k][["x_mm", "y_mm"]].values
  754. axes[0].scatter(xy[:, 0], xy[:, 1], c="lightgray", s=3, alpha=0.3, label="噪声")
  755. else:
  756. xy = filtered_df[labels == k][["x_mm", "y_mm"]].values
  757. axes[0].scatter(xy[:, 0], xy[:, 1], c=[colors(k)], s=15, alpha=0.7,
  758. label=f"簇 {k+1} ({len(xy)} 点)")
  759. axes[0].set_title(f"DBSCAN 空间聚类结果 (eps={eps}, min_samples={min_samples})")
  760. axes[0].set_xlabel("X (mm)")
  761. axes[0].set_ylabel("Y (mm)")
  762. axes[0].set_aspect("equal")
  763. axes[0].legend(fontsize=7, loc="upper right", ncol=2)
  764. # 右图:PCA 降维可视化(加入更多特征维度)
  765. if len(filtered_df) > 2:
  766. # 构建多维特征:x, y, hour, defect_type编码, severity编码
  767. feature_df = filtered_df[["x_mm", "y_mm", "hour"]].copy()
  768. # 缺陷类型编码
  769. type_map = {t: i for i, t in enumerate(filtered_df["defect_type"].unique())}
  770. feature_df["type_code"] = filtered_df["defect_type"].map(type_map).astype(float)
  771. # 严重程度编码
  772. sev_map = {"轻微": 0, "中等": 1, "严重": 2}
  773. feature_df["sev_code"] = filtered_df["severity"].map(sev_map).astype(float)
  774. features = feature_df.values
  775. features_scaled = StandardScaler().fit_transform(features)
  776. # PCA 降维到 2D
  777. n_components = min(2, features_scaled.shape[1])
  778. pca = PCA(n_components=n_components)
  779. pca_result = pca.fit_transform(features_scaled)
  780. explained_var = pca.explained_variance_ratio_
  781. for k in unique_labels:
  782. mask_k = labels == k
  783. if k == -1:
  784. axes[1].scatter(pca_result[mask_k, 0], pca_result[mask_k, 1],
  785. c="lightgray", s=3, alpha=0.3, label="噪声")
  786. else:
  787. axes[1].scatter(pca_result[mask_k, 0], pca_result[mask_k, 1],
  788. c=[colors(k)], s=15, alpha=0.7, label=f"簇 {k+1}")
  789. axes[1].set_title(
  790. f"PCA 多维特征降维\n"
  791. f"PC1: {explained_var[0]*100:.1f}% | PC2: {explained_var[1]*100:.1f}%"
  792. )
  793. axes[1].set_xlabel("主成分 1")
  794. axes[1].set_ylabel("主成分 2")
  795. axes[1].legend(fontsize=7, loc="upper right")
  796. st.pyplot(fig)
  797. plt.close()
  798. # --- 簇特征统计 ---
  799. if n_clusters > 0:
  800. st.divider()
  801. st.subheader("各簇特征分析")
  802. cluster_data = []
  803. for k in sorted([c for c in unique_labels if c != -1]):
  804. cluster_df = filtered_df[labels == k]
  805. cluster_data.append({
  806. "簇编号": k + 1,
  807. "缺陷数量": len(cluster_df),
  808. "占比": f"{len(cluster_df)/len(filtered_df)*100:.1f}%",
  809. "中心X(mm)": round(cluster_df["x_mm"].mean(), 1),
  810. "中心Y(mm)": round(cluster_df["y_mm"].mean(), 1),
  811. "X范围": f"{cluster_df['x_mm'].min():.0f}~{cluster_df['x_mm'].max():.0f}",
  812. "Y范围": f"{cluster_df['y_mm'].min():.0f}~{cluster_df['y_mm'].max():.0f}",
  813. "主要缺陷": cluster_df["defect_type"].mode().iloc[0] if len(cluster_df) > 0 else "-",
  814. "主要严重度": cluster_df["severity"].mode().iloc[0] if len(cluster_df) > 0 else "-",
  815. "涉及批次": cluster_df["batch_id"].nunique(),
  816. "涉及面板": cluster_df["panel_id"].nunique(),
  817. })
  818. st.dataframe(pd.DataFrame(cluster_data), use_container_width=True)
  819. with col2:
  820. # --- 聚类结果说明 ---
  821. st.subheader("📖 结果解读")
  822. st.markdown(
  823. f"""
  824. **当前参数**: eps={eps}mm, min_samples={min_samples}
  825. **聚类统计**:
  826. - 缺陷聚集区域: {n_clusters} 个
  827. - 随机散落噪声: {n_noise} 个
  828. - 噪声占比: {n_noise/len(filtered_df)*100:.1f}%
  829. **参数调优建议**:
  830. - **eps 调大** → 簇数量减少,簇变大
  831. - **eps 调小** → 簇数量增加,更精细
  832. - **min_samples 调大** → 只有高度密集区域才算簇
  833. - **min_samples 调小** → 更多区域被识别为簇
  834. **工业应用**:
  835. - 每个"簇"代表一个**系统性缺陷源**
  836. (如某台设备、某道工序、某个物料批次)
  837. - "噪声"点是随机缺陷,通常无需特别关注
  838. - 重点关注**缺陷数量多、涉及批次集中**的簇
  839. """
  840. )
  841. # --- 簇分布饼图 ---
  842. if n_clusters > 0:
  843. st.subheader("簇规模分布")
  844. cluster_counts = filtered_df[labels >= 0]["cluster"].value_counts().sort_index()
  845. fig_pie, ax_pie = plt.subplots(figsize=(5, 5))
  846. pie_labels = [f"簇{i+1}" for i in cluster_counts.index]
  847. ax_pie.pie(cluster_counts.values, labels=pie_labels, autopct="%1.1f%%",
  848. colors=plt.cm.tab20.colors[:len(cluster_counts)], startangle=90)
  849. ax_pie.set_title("各簇缺陷占比")
  850. st.pyplot(fig_pie)
  851. plt.close()
  852. # --- DBSCAN vs K-Means 对比 ---
  853. st.subheader("为什么选 DBSCAN?")
  854. st.markdown(
  855. """
  856. | 维度 | DBSCAN | K-Means |
  857. |------|--------|---------|
  858. | 形状适应 | ✅ 任意形状 | ❌ 仅球形 |
  859. | 预设K值 | ❌ 不需要 | ✅ 必须 |
  860. | 噪声处理 | ✅ 自动过滤 | ❌ 干扰聚类 |
  861. | 环形/线形缺陷 | ✅ 能识别 | ❌ 识别不了 |
  862. """
  863. )
  864. # ========== Tab 8: SPC 控制图与预警 ==========
  865. _t = get_tab("🚨 SPC 控制图与预警")
  866. if _t:
  867. with _t:
  868. st.header("🚨 SPC 统计过程控制")
  869. st.markdown(
  870. "基于统计过程控制(SPC)方法,监控每日缺陷率是否在控制限内,"
  871. "自动检测异常趋势并给出改善/恶化结论。"
  872. )
  873. # --- 数据准备:按天计算缺陷率 ---
  874. # 需要知道每天检测了多少面板才能算缺陷率
  875. # 用 batch_id 近似日期
  876. daily_all = df.groupby("day").agg(
  877. total_defects=("defect_id", "count"),
  878. panels_with_defects=("panel_id", "nunique")
  879. ).reset_index()
  880. daily_all["day"] = pd.to_datetime(daily_all["day"])
  881. daily_all = daily_all.sort_values("day").reset_index(drop=True)
  882. if len(daily_all) < 2:
  883. st.warning("数据天数不足,无法生成控制图")
  884. else:
  885. # 估算每天检测总数:用总面板数 / 总天数近似
  886. total_days = (df["timestamp"].max() - df["timestamp"].min()).days + 1
  887. total_unique_panels = df["panel_id"].nunique()
  888. daily_all["estimated_inspected"] = max(total_unique_panels // max(total_days // 7, 1), 1) # 按工作日估算
  889. daily_all["defect_rate"] = daily_all["panels_with_defects"] / daily_all["estimated_inspected"]
  890. # 控制限计算
  891. p_bar = daily_all["defect_rate"].mean()
  892. n_avg = daily_all["estimated_inspected"].mean()
  893. sigma_p = np.sqrt(p_bar * (1 - p_bar) / n_avg) if n_avg > 0 and p_bar > 0 else 0
  894. UCL = p_bar + 3 * sigma_p # 上控制限
  895. LCL = max(0, p_bar - 3 * sigma_p) # 下控制限
  896. UWL = p_bar + 2 * sigma_p # 上警告限
  897. LWL = max(0, p_bar - 2 * sigma_p) # 下警告限
  898. # --- Western Electric 规则检测 ---
  899. we_violations = []
  900. # 规则1: 单点超出 3σ 控制限
  901. for i, row in daily_all.iterrows():
  902. if row["defect_rate"] > UCL or row["defect_rate"] < LCL:
  903. we_violations.append({
  904. "日期": row["day"].strftime("%Y-%m-%d"),
  905. "规则": "Rule 1: 超出3σ控制限",
  906. "值": f"{row['defect_rate']:.2%}"
  907. })
  908. # 规则2: 连续7点上升或下降
  909. rates = daily_all["defect_rate"].values
  910. if len(rates) >= 7:
  911. for i in range(len(rates) - 6):
  912. window = rates[i:i+7]
  913. if all(window[j] < window[j+1] for j in range(6)):
  914. we_violations.append({
  915. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  916. "规则": "Rule 2: 连续7点上升",
  917. "值": f"{rates[i]:.2%} → {rates[i+6]:.2%}"
  918. })
  919. elif all(window[j] > window[j+1] for j in range(6)):
  920. we_violations.append({
  921. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  922. "规则": "Rule 2: 连续7点下降",
  923. "值": f"{rates[i]:.2%} → {rates[i+6]:.2%}"
  924. })
  925. # 规则3: 连续7点在中心线同一侧
  926. for i in range(len(rates) - 6):
  927. window = rates[i:i+7]
  928. if all(v > p_bar for v in window):
  929. we_violations.append({
  930. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  931. "规则": "Rule 3: 连续7点在CL上方",
  932. "值": f"持续偏高"
  933. })
  934. elif all(v < p_bar for v in window):
  935. we_violations.append({
  936. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  937. "规则": "Rule 3: 连续7点在CL下方",
  938. "值": f"持续偏低"
  939. })
  940. # --- 趋势分析 ---
  941. from numpy.polynomial import polynomial as P
  942. x = np.arange(len(daily_all))
  943. coeffs = np.polyfit(x, rates, 1)
  944. slope = coeffs[0]
  945. daily_all["trend"] = np.polyval(coeffs, x)
  946. if abs(slope) < sigma_p * 0.1:
  947. trend_status = "稳定"
  948. trend_icon = "➡️"
  949. trend_color = "normal"
  950. elif slope > 0:
  951. trend_status = "恶化中"
  952. trend_icon = "📈"
  953. trend_color = "inverse"
  954. else:
  955. trend_status = "改善中"
  956. trend_icon = "📉"
  957. trend_color = "normal"
  958. # --- KPI 行 ---
  959. kpi_spc1, kpi_spc2, kpi_spc3, kpi_spc4 = st.columns(4)
  960. kpi_spc1.metric("平均缺陷率", f"{p_bar:.2%}")
  961. kpi_spc2.metric("控制限 (UCL/LCL)", f"{UCL:.2%} / {LCL:.2%}")
  962. kpi_spc3.metric("趋势判断", f"{trend_icon} {trend_status}", delta=f"斜率: {slope*100:.3f}%/天", delta_color=trend_color)
  963. kpi_spc4.metric("Western Electric 告警", f"{len(we_violations)} 次", delta="需关注" if len(we_violations) > 0 else "正常")
  964. # --- 控制图 ---
  965. st.divider()
  966. st.subheader("X-bar 控制图 (每日缺陷率)")
  967. fig_spc, ax_spc = plt.subplots(figsize=(14, 5))
  968. # 数据点
  969. ax_spc.plot(daily_all["day"], daily_all["defect_rate"],
  970. marker="o", markersize=4, linewidth=1.5, color="steelblue", label="日缺陷率")
  971. ax_spc.fill_between(daily_all["day"], daily_all["defect_rate"], alpha=0.15, color="steelblue")
  972. # 控制限线
  973. ax_spc.axhline(y=p_bar, color="green", linestyle="-", linewidth=1.5, label=f"CL (中心线): {p_bar:.2%}")
  974. ax_spc.axhline(y=UCL, color="red", linestyle="--", linewidth=1, label=f"UCL: {UCL:.2%}")
  975. ax_spc.axhline(y=LCL, color="red", linestyle="--", linewidth=1, label=f"LCL: {LCL:.2%}")
  976. ax_spc.axhline(y=UWL, color="orange", linestyle=":", linewidth=1, alpha=0.6, label=f"UWL (2σ): {UWL:.2%}")
  977. ax_spc.axhline(y=LWL, color="orange", linestyle=":", linewidth=1, alpha=0.6, label=f"LWL (2σ): {LWL:.2%}")
  978. # 标注异常点
  979. for v in we_violations:
  980. if "Rule 1" in v["规则"]:
  981. anomaly_date = pd.Timestamp(v["日期"])
  982. val = float(v["值"].rstrip("%")) / 100
  983. ax_spc.annotate("⚠️", (anomaly_date, val), fontsize=12,
  984. ha="center", va="bottom", color="red")
  985. ax_spc.set_title("SPC 控制图 - 每日缺陷率")
  986. ax_spc.set_ylabel("缺陷率")
  987. ax_spc.tick_params(axis="x", rotation=45)
  988. ax_spc.legend(fontsize=8, loc="upper right")
  989. ax_spc.grid(True, alpha=0.3)
  990. st.pyplot(fig_spc)
  991. plt.close()
  992. # --- 趋势图 ---
  993. st.subheader("缺陷率趋势 (含线性回归)")
  994. fig_trend, ax_trend = plt.subplots(figsize=(14, 4))
  995. ax_trend.plot(daily_all["day"], daily_all["defect_rate"],
  996. marker="o", markersize=3, linewidth=1.5, color="steelblue", label="日缺陷率")
  997. ax_trend.plot(daily_all["day"], daily_all["trend"],
  998. color="red", linestyle="--", linewidth=2, label=f"趋势线 (斜率: {slope*100:.3f}%/天)")
  999. ax_trend.fill_between(daily_all["day"], daily_all["defect_rate"], alpha=0.1, color="steelblue")
  1000. ax_trend.axhline(y=p_bar, color="green", linestyle="--", alpha=0.5, label=f"平均: {p_bar:.2%}")
  1001. ax_trend.set_ylabel("缺陷率")
  1002. ax_trend.tick_params(axis="x", rotation=45)
  1003. ax_trend.legend(fontsize=8)
  1004. ax_trend.grid(True, alpha=0.3)
  1005. st.pyplot(fig_trend)
  1006. plt.close()
  1007. # --- 告警清单 ---
  1008. st.divider()
  1009. st.subheader("⚠️ Western Electric 规则告警清单")
  1010. if we_violations:
  1011. we_df = pd.DataFrame(we_violations)
  1012. st.dataframe(we_df, use_container_width=True)
  1013. st.warning(f"共发现 **{len(we_violations)}** 次统计异常,建议关注对应日期的工艺参数和人员排班")
  1014. else:
  1015. st.success("✅ 未触发 Western Electric 规则告警,过程处于统计控制状态")
  1016. # --- 结论 ---
  1017. st.divider()
  1018. st.subheader("📋 过程能力结论")
  1019. if trend_status == "改善中":
  1020. st.success(
  1021. f"**趋势改善中** 📉\n\n"
  1022. f"每日缺陷率以平均 {abs(slope)*100:.3f}%/天 的速度下降。\n"
  1023. f"当前平均缺陷率为 {p_bar:.2%},控制上限 {UCL:.2%}。\n"
  1024. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。"
  1025. )
  1026. elif trend_status == "恶化中":
  1027. st.error(
  1028. f"**趋势恶化中** 📈\n\n"
  1029. f"每日缺陷率以平均 {slope*100:.3f}%/天 的速度上升。\n"
  1030. f"当前平均缺陷率为 {p_bar:.2%},控制上限 {UCL:.2%}。\n"
  1031. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。\n\n"
  1032. f"建议:检查近期工艺参数变化、设备状态和原材料批次。"
  1033. )
  1034. else:
  1035. st.info(
  1036. f"**过程稳定** ➡️\n\n"
  1037. f"缺陷率趋势平稳,斜率 {slope*100:.3f}%/天,无显著上升或下降。\n"
  1038. f"当前平均缺陷率为 {p_bar:.2%},控制限 [{LCL:.2%}, {UCL:.2%}]。\n"
  1039. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。"
  1040. )
  1041. # ========== 重复缺陷坐标检测 ==========
  1042. _t = get_tab("🗺️ 空间集中性")
  1043. if _t:
  1044. with _t:
  1045. st.divider()
  1046. st.subheader("🎯 重复缺陷坐标检测")
  1047. st.markdown(
  1048. "检测在不同面板上重复出现的缺陷坐标。随机缺陷不会在同一位置反复出现,"
  1049. "而设备硬伤(如吸嘴划伤、夹具压痕)会在相同位置持续产生缺陷。"
  1050. "这是从'描述分析'跨入'根因诊断'的关键一步。"
  1051. )
  1052. # 坐标分桶:将面板划分为网格,找出跨面板重复的缺陷桶
  1053. repeat_bin_size = st.slider("坐标分桶大小 (mm)", min_value=5, max_value=50, value=15, step=5,
  1054. help="将坐标按此大小分桶,同一桶内出现于不同面板的缺陷视为'重复'")
  1055. pw = df["panel_width_mm"].iloc[0]
  1056. ph = df["panel_height_mm"].iloc[0]
  1057. # 计算桶ID
  1058. df_copy = filtered_df.copy()
  1059. df_copy["x_bin"] = (df_copy["x_mm"] // repeat_bin_size).astype(int)
  1060. df_copy["y_bin"] = (df_copy["y_mm"] // repeat_bin_size).astype(int)
  1061. df_copy["bin_key"] = df_copy["x_bin"].astype(str) + "_" + df_copy["y_bin"].astype(str)
  1062. # 统计每个桶出现在多少不同面板上
  1063. bin_panels = df_copy.groupby("bin_key").agg(
  1064. panel_count=("panel_id", "nunique"),
  1065. defect_count=("defect_id", "count"),
  1066. x_center=("x_mm", "mean"),
  1067. y_center=("y_mm", "mean"),
  1068. dominant_type=("defect_type", lambda x: x.mode().iloc[0] if len(x) > 0 else "-"),
  1069. dominant_severity=("severity", lambda x: x.mode().iloc[0] if len(x) > 0 else "-"),
  1070. ).reset_index()
  1071. repeat_threshold = st.slider("重复判定阈值 (跨面板数)", min_value=2, max_value=10, value=3)
  1072. repeated_bins = bin_panels[bin_panels["panel_count"] >= repeat_threshold].sort_values("panel_count", ascending=False)
  1073. col_repeat1, col_repeat2 = st.columns([1, 2])
  1074. with col_repeat1:
  1075. st.metric("重复缺陷桶数", f"{len(repeated_bins)}",
  1076. delta=f"阈值: ≥{repeat_threshold} 块面板")
  1077. if len(repeated_bins) > 0:
  1078. st.dataframe(
  1079. repeated_bins[["panel_count", "defect_count", "x_center", "y_center", "dominant_type", "dominant_severity"]]
  1080. .rename(columns={"panel_count": "涉及面板", "defect_count": "缺陷总数",
  1081. "x_center": "中心X", "y_center": "中心Y",
  1082. "dominant_type": "主要类型", "dominant_severity": "主要严重度"}),
  1083. use_container_width=True, height=400
  1084. )
  1085. else:
  1086. st.info(f"未发现跨 {repeat_threshold}+ 块面板的重复缺陷坐标")
  1087. with col_repeat2:
  1088. if len(repeated_bins) > 0:
  1089. # 在面板图上标注重复缺陷桶
  1090. fig_repeat, ax_repeat = plt.subplots(figsize=(4, 6))
  1091. # 面板背景
  1092. ax_repeat.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  1093. ax_repeat.add_patch(plt.Rectangle((8, 8), pw-16, ph-16, facecolor="#16213e", edgecolor="#0f3460", linewidth=1.5))
  1094. # 所有缺陷散点(淡)
  1095. ax_repeat.scatter(filtered_df["x_mm"], filtered_df["y_mm"],
  1096. alpha=0.1, s=2, c="gray", edgecolors="none", zorder=1)
  1097. # 重复缺陷桶标注重叠圈
  1098. max_count = repeated_bins["panel_count"].max()
  1099. for _, row in repeated_bins.iterrows():
  1100. size = 100 + (row["panel_count"] / max_count) * 400
  1101. ax_repeat.scatter(row["x_center"], row["y_center"],
  1102. s=size, c="red", alpha=0.3, edgecolors="red",
  1103. linewidth=2, zorder=3)
  1104. ax_repeat.text(row["x_center"], row["y_center"],
  1105. str(row["panel_count"]), ha="center", va="center",
  1106. fontsize=8, color="white", fontweight="bold", zorder=4)
  1107. ax_repeat.set_xlim(-5, pw + 5)
  1108. ax_repeat.set_ylim(-5, ph + 5)
  1109. ax_repeat.set_title(f"重复缺陷坐标 (≥{repeat_threshold} 块面板)", fontsize=11)
  1110. ax_repeat.set_xlabel("X (mm)")
  1111. ax_repeat.set_ylabel("Y (mm)")
  1112. ax_repeat.set_aspect("equal")
  1113. ax_repeat.grid(True, alpha=0.1, color="gray")
  1114. st.pyplot(fig_repeat)
  1115. plt.close()
  1116. else:
  1117. st.info("调整分桶大小或阈值以检测重复缺陷")
  1118. # ========== Tab 9: 缺陷模式识别 ==========
  1119. _t = get_tab("🔬 缺陷模式识别")
  1120. if _t:
  1121. with _t:
  1122. st.header("🔬 缺陷空间模式自动识别")
  1123. st.markdown(
  1124. "参考 WM811K 晶圆缺陷图谱分类标准,对每块面板的缺陷分布进行模式评分。"
  1125. "不同模式对应不同的根因机制(如边缘型→贴合工艺,角落型→夹具应力,"
  1126. "中心型→压力不均,线条型→机械刮伤,随机型→来料污染)。"
  1127. )
  1128. from scipy.spatial import ConvexHull
  1129. from scipy.spatial.distance import cdist
  1130. pw = df["panel_width_mm"].iloc[0]
  1131. ph = df["panel_height_mm"].iloc[0]
  1132. # 按面板分组,逐块分析模式
  1133. panel_groups = filtered_df.groupby("panel_id")
  1134. patterns_results = []
  1135. for panel_id, panel_data in panel_groups:
  1136. if len(panel_data) < 3:
  1137. continue
  1138. coords = panel_data[["x_mm", "y_mm"]].values
  1139. # 归一化坐标到 [0,1]
  1140. x_norm = panel_data["x_mm"].values / pw
  1141. y_norm = panel_data["y_mm"].values / ph
  1142. # --- 模式1: 边缘型 (缺陷靠近面板四边) ---
  1143. # 计算每个点到最近边缘的距离比例
  1144. edge_dist = np.minimum(np.minimum(x_norm, 1 - x_norm),
  1145. np.minimum(y_norm, 1 - y_norm))
  1146. edge_ratio = (edge_dist < 0.12).mean() # 12% 以内的点视为边缘点
  1147. edge_score = edge_ratio
  1148. # --- 模式2: 角落型 (缺陷集中在四个角落) ---
  1149. corner_threshold = 0.15 # 15% 范围
  1150. in_corner = (
  1151. ((x_norm < corner_threshold) & (y_norm < corner_threshold)) | # 左下
  1152. ((x_norm < corner_threshold) & (y_norm > 1 - corner_threshold)) | # 左上
  1153. ((x_norm > 1 - corner_threshold) & (y_norm < corner_threshold)) | # 右下
  1154. ((x_norm > 1 - corner_threshold) & (y_norm > 1 - corner_threshold)) # 右上
  1155. )
  1156. corner_score = in_corner.mean()
  1157. # --- 模式3: 中心型 (缺陷集中在面板中心区域) ---
  1158. center_x, center_y = 0.5, 0.5
  1159. dist_to_center = np.sqrt((x_norm - center_x)**2 + (y_norm - center_y)**2)
  1160. center_radius = 0.18 # 18% 半径
  1161. center_score = (dist_to_center < center_radius).mean()
  1162. # --- 模式4: 线条型 (缺陷沿一条线分布) ---
  1163. # 用 PCA 第一主成分占比来判断线性程度
  1164. if len(coords) >= 3:
  1165. from sklearn.decomposition import PCA
  1166. pca = PCA(n_components=2)
  1167. pca.fit(coords)
  1168. linearity = pca.explained_variance_ratio_[0] # 第一主成分占比
  1169. line_score = linearity
  1170. else:
  1171. line_score = 0
  1172. # --- 模式5: 随机型 (均匀分布,无明显模式) ---
  1173. # 用空间变异系数:将面板分为网格,计算各格缺陷数的变异系数
  1174. grid_n = 5
  1175. x_edges = np.linspace(0, pw, grid_n + 1)
  1176. y_edges = np.linspace(0, ph, grid_n + 1)
  1177. H, _, _ = np.histogram2d(panel_data["x_mm"].values, panel_data["y_mm"].values,
  1178. bins=[x_edges, y_edges])
  1179. if H.sum() > 0 and H.std() > 0:
  1180. cv = H.std() / H.mean() if H.mean() > 0 else 999
  1181. # cv 越小越均匀(随机)
  1182. randomness_score = max(0, 1 - cv / 3) # 归一化到 [0,1]
  1183. else:
  1184. randomness_score = 0
  1185. # --- 主导模式判定 ---
  1186. scores = {
  1187. "边缘型": edge_score,
  1188. "角落型": corner_score,
  1189. "中心型": center_score,
  1190. "线条型": line_score,
  1191. "随机型": randomness_score,
  1192. }
  1193. dominant_pattern = max(scores, key=scores.get)
  1194. patterns_results.append({
  1195. "面板ID": panel_id,
  1196. "缺陷数": len(panel_data),
  1197. "主导模式": dominant_pattern,
  1198. "边缘型": round(edge_score, 2),
  1199. "角落型": round(corner_score, 2),
  1200. "中心型": round(center_score, 2),
  1201. "线条型": round(line_score, 2),
  1202. "随机型": round(randomness_score, 2),
  1203. })
  1204. if patterns_results:
  1205. pattern_df = pd.DataFrame(patterns_results)
  1206. # --- 模式统计 ---
  1207. col_pat1, col_pat2, col_pat3 = st.columns([1, 1, 2])
  1208. with col_pat1:
  1209. pattern_counts = pattern_df["主导模式"].value_counts()
  1210. fig_pat, ax_pat = plt.subplots(figsize=(8, 5))
  1211. colors_pat = {"边缘型": "#FF6B6B", "角落型": "#FFA500", "中心型": "#4ECDC4",
  1212. "线条型": "#9B59B6", "随机型": "#95A5A6"}
  1213. bars = ax_pat.bar(pattern_counts.index, pattern_counts.values,
  1214. color=[colors_pat.get(p, "#888") for p in pattern_counts.index],
  1215. alpha=0.8)
  1216. for bar, count in zip(bars, pattern_counts.values):
  1217. ax_pat.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
  1218. str(count), ha="center", va="bottom", fontsize=11, fontweight="bold")
  1219. ax_pat.set_title("缺陷模式分布")
  1220. ax_pat.set_ylabel("面板数量")
  1221. st.pyplot(fig_pat)
  1222. plt.close()
  1223. with col_pat2:
  1224. st.subheader("模式占比")
  1225. total_panels = len(pattern_df)
  1226. for pattern in ["边缘型", "角落型", "中心型", "线条型", "随机型"]:
  1227. count = (pattern_df["主导模式"] == pattern).sum()
  1228. pct = count / total_panels * 100
  1229. st.metric(pattern, f"{count} 块", f"{pct:.1f}%")
  1230. with col_pat3:
  1231. # --- 模式-根因映射 ---
  1232. st.subheader("模式 → 可能根因")
  1233. root_cause_map = {
  1234. "边缘型": {
  1235. "可能原因": "贴合工艺参数异常、边缘夹具压力不均、涂胶厚度不均",
  1236. "建议排查": "检查贴合压力、边缘密封工艺、涂胶均匀性"
  1237. },
  1238. "角落型": {
  1239. "可能原因": "夹具应力集中、面板放置定位偏差、角落散热不良",
  1240. "建议排查": "检查夹具对齐、面板定位精度、角落温度分布"
  1241. },
  1242. "中心型": {
  1243. "可能原因": "压力中心不均、FPC绑定区域工艺异常、中心温度过高",
  1244. "建议排查": "检查压力分布曲线、FPC绑定参数、加热板温度"
  1245. },
  1246. "线条型": {
  1247. "可能原因": "机械刮伤、传送带划痕、清洗刷毛磨损、吸嘴移动轨迹",
  1248. "建议排查": "检查传送带状态、清洗设备、吸嘴运动轨迹"
  1249. },
  1250. "随机型": {
  1251. "可能原因": "来料污染、环境尘埃、化学药液杂质",
  1252. "建议排查": "检查洁净室等级、来料检验记录、药液过滤状态"
  1253. },
  1254. }
  1255. for pattern in ["边缘型", "角落型", "中心型", "线条型", "随机型"]:
  1256. count = (pattern_df["主导模式"] == pattern).sum()
  1257. if count == 0:
  1258. continue
  1259. rc = root_cause_map[pattern]
  1260. with st.expander(f"{pattern} ({count} 块面板)"):
  1261. st.markdown(f"**可能原因**: {rc['可能原因']}")
  1262. st.markdown(f"**建议排查**: {rc['建议排查']}")
  1263. # --- 详细数据表 ---
  1264. st.divider()
  1265. st.subheader("面板模式评分明细")
  1266. st.dataframe(pattern_df, use_container_width=True, height=400)
  1267. else:
  1268. st.warning("当前筛选条件下无足够面板数据进行模式分析(需至少 3 个缺陷/面板)")
  1269. # ========== Tab 10: 设备健康与共性分析 ==========
  1270. _t = get_tab("💚 设备健康与共性分析")
  1271. if _t:
  1272. with _t:
  1273. st.header("💚 设备健康评分 & 共性分析")
  1274. st.markdown(
  1275. "综合评估各台设备的健康状态,并在发现异常批次时自动分析其共性特征。"
  1276. )
  1277. # --- 设备健康评分 ---
  1278. st.subheader("设备健康评分 (0-100)")
  1279. st.markdown("评分维度:缺陷率(40%) + 座号集中度(30%) + 严重度分布(30%)")
  1280. health_data = []
  1281. for eq_id in sorted(df["equipment_id"].unique()):
  1282. eq_all = df[df["equipment_id"] == eq_id]
  1283. eq_filtered = filtered_df[filtered_df["equipment_id"] == eq_id]
  1284. # 维度1: 缺陷率评分 (40%)
  1285. eq_panels = eq_all["panel_id"].nunique()
  1286. eq_defects = len(eq_all)
  1287. eq_defect_rate = eq_defects / max(eq_panels, 1)
  1288. # 缺陷率越低分越高,线性归一化
  1289. # 以 5 个缺陷/面板为最差(0分),0 为最好(100分)
  1290. rate_score = max(0, 100 * (1 - eq_defect_rate / 5))
  1291. # 维度2: 座号集中度评分 (30%)
  1292. # 座号分布越均匀分越高,集中分越低
  1293. eq_seat_counts = eq_all.groupby("seat_id").size()
  1294. if len(eq_seat_counts) > 1:
  1295. seat_cv = eq_seat_counts.std() / max(eq_seat_counts.mean(), 0.001)
  1296. # cv 越小越均匀,得分越高
  1297. seat_score = max(0, 100 * (1 - seat_cv / 3))
  1298. else:
  1299. seat_score = 50
  1300. # 维度3: 严重度评分 (30%)
  1301. eq_sev = eq_all["severity"].value_counts()
  1302. severe_ratio = eq_sev.get("严重", 0) / max(len(eq_all), 1)
  1303. sev_score = max(0, 100 * (1 - severe_ratio * 3)) # 严重占比 33% 时为 0 分
  1304. # 综合得分
  1305. total_score = rate_score * 0.4 + seat_score * 0.3 + sev_score * 0.3
  1306. health_data.append({
  1307. "设备ID": eq_id,
  1308. "缺陷总数": eq_defects,
  1309. "缺陷率": f"{eq_defect_rate:.2f}",
  1310. "座号集中度(CV)": f"{seat_cv:.2f}" if len(eq_seat_counts) > 1 else "N/A",
  1311. "严重占比": f"{severe_ratio:.1%}",
  1312. "缺陷率分(40%)": round(rate_score, 1),
  1313. "座号分(30%)": round(seat_score, 1),
  1314. "严重度分(30%)": round(sev_score, 1),
  1315. "健康总分": round(total_score, 1),
  1316. })
  1317. health_df = pd.DataFrame(health_data).sort_values("健康总分", ascending=False)
  1318. # 显示健康评分
  1319. col_h1, col_h2 = st.columns([3, 2])
  1320. with col_h1:
  1321. st.dataframe(health_df, use_container_width=True, hide_index=True)
  1322. with col_h2:
  1323. # 可视化排名
  1324. fig_health, ax_health = plt.subplots(figsize=(6, 4))
  1325. health_sorted = health_df.sort_values("健康总分", ascending=True)
  1326. colors_health = ["#4CAF50" if s >= 70 else "#FF9800" if s >= 40 else "#F44336"
  1327. for s in health_sorted["健康总分"]]
  1328. bars = ax_health.barh(health_sorted["设备ID"], health_sorted["健康总分"],
  1329. color=colors_health, alpha=0.8, height=0.5)
  1330. for bar, score in zip(bars, health_sorted["健康总分"]):
  1331. ax_health.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
  1332. f"{score:.0f}", ha="left", va="center", fontsize=12, fontweight="bold")
  1333. ax_health.set_xlabel("健康评分 (0-100)")
  1334. ax_health.set_title("设备健康排名")
  1335. ax_health.set_xlim(0, 110)
  1336. st.pyplot(fig_health)
  1337. plt.close()
  1338. # --- 共性分析 ---
  1339. st.divider()
  1340. st.subheader("🔍 异常批次共性分析")
  1341. st.markdown("选中异常批次后,自动分析这些批次的共同特征(设备/时段/座号/缺陷类型)。")
  1342. # 自动检测异常批次(基于缺陷率)
  1343. batch_stats = df.groupby("batch_id").agg(
  1344. defects=("defect_id", "count"),
  1345. panels=("panel_id", "nunique")
  1346. )
  1347. batch_stats["defect_rate"] = batch_stats["defects"] / batch_stats["panels"]
  1348. threshold = batch_stats["defect_rate"].mean() + batch_stats["defect_rate"].std()
  1349. abnormal_batches = batch_stats[batch_stats["defect_rate"] > threshold].index.tolist()
  1350. st.info(f"自动检测到的异常批次 (缺陷率 > {threshold:.2%}): **{len(abnormal_batches)}** 个")
  1351. st.write(", ".join(abnormal_batches[:10]))
  1352. if abnormal_batches:
  1353. col_c1, col_c2 = st.columns(2)
  1354. with col_c1:
  1355. # 选择要分析的批次
  1356. selected_abnormal = st.multiselect(
  1357. "选择要分析的异常批次",
  1358. options=abnormal_batches,
  1359. default=abnormal_batches[:3] if len(abnormal_batches) >= 3 else abnormal_batches,
  1360. key="commonality_batch"
  1361. )
  1362. if selected_abnormal:
  1363. abnormal_df = df[df["batch_id"].isin(selected_abnormal)]
  1364. normal_df = df[~df["batch_id"].isin(selected_abnormal)]
  1365. st.divider()
  1366. st.markdown(f"**分析对象**: {len(selected_abnormal)} 个异常批次, "
  1367. f"{len(abnormal_df)} 条缺陷记录")
  1368. # 共性分析:设备
  1369. st.subheader("共性特征 TOP3")
  1370. col_common1, col_common2, col_common3 = st.columns(3)
  1371. with col_common1:
  1372. # 设备共性
  1373. abnormal_eq_rate = abnormal_df.groupby("equipment_id").size() / len(abnormal_df)
  1374. normal_eq_rate = normal_df.groupby("equipment_id").size() / len(normal_df)
  1375. eq_boost = {}
  1376. for eq in abnormal_df["equipment_id"].unique():
  1377. a_rate = abnormal_eq_rate.get(eq, 0)
  1378. n_rate = normal_eq_rate.get(eq, 0)
  1379. if n_rate > 0:
  1380. eq_boost[eq] = (a_rate - n_rate) / n_rate * 100
  1381. else:
  1382. eq_boost[eq] = 999
  1383. eq_top = sorted(eq_boost.items(), key=lambda x: x[1], reverse=True)[:3]
  1384. st.markdown("**设备共用性**")
  1385. for eq, boost in eq_top:
  1386. st.markdown(f"- {eq}: 异常占比 {abnormal_eq_rate.get(eq, 0):.1%}, "
  1387. f"相对正常 **+{boost:.0f}%**")
  1388. with col_common2:
  1389. # 时段共性
  1390. abnormal_hour = abnormal_df.groupby("hour").size() / len(abnormal_df)
  1391. normal_hour = normal_df.groupby("hour").size() / len(normal_df)
  1392. # 按班次聚合
  1393. abnormal_shift = abnormal_df.groupby("shift").size() / len(abnormal_df)
  1394. normal_shift = normal_df.groupby("shift").size() / len(normal_df)
  1395. st.markdown("**时段共性**")
  1396. for shift in ["白班", "夜班"]:
  1397. a_rate = abnormal_shift.get(shift, 0)
  1398. n_rate = normal_shift.get(shift, 0)
  1399. if n_rate > 0:
  1400. boost = (a_rate - n_rate) / n_rate * 100
  1401. else:
  1402. boost = 999
  1403. st.markdown(f"- {shift}: 异常占比 {a_rate:.1%}, "
  1404. f"相对正常 **{'+' if boost > 0 else ''}{boost:.0f}%**")
  1405. with col_common3:
  1406. # 座号共性
  1407. abnormal_seat = abnormal_df.groupby("seat_id").size() / len(abnormal_df)
  1408. normal_seat = normal_df.groupby("seat_id").size() / len(normal_df)
  1409. seat_boost = {}
  1410. for seat in abnormal_df["seat_id"].unique():
  1411. a_rate = abnormal_seat.get(seat, 0)
  1412. n_rate = normal_seat.get(seat, 0)
  1413. if n_rate > 0:
  1414. seat_boost[seat] = (a_rate - n_rate) / n_rate * 100
  1415. else:
  1416. seat_boost[seat] = 999
  1417. seat_top = sorted(seat_boost.items(), key=lambda x: x[1], reverse=True)[:3]
  1418. st.markdown("**座号共性**")
  1419. for seat, boost in seat_top:
  1420. st.markdown(f"- {seat}: 异常占比 {abnormal_seat.get(seat, 0):.1%}, "
  1421. f"相对正常 **+{boost:.0f}%**")
  1422. # --- 缺陷类型偏差 ---
  1423. st.subheader("异常批次缺陷类型偏差")
  1424. abnormal_type = abnormal_df.groupby("defect_type").size() / len(abnormal_df)
  1425. normal_type = normal_df.groupby("defect_type").size() / len(normal_df)
  1426. type_diff = []
  1427. for t in set(list(abnormal_type.index) + list(normal_type.index)):
  1428. a_rate = abnormal_type.get(t, 0)
  1429. n_rate = normal_type.get(t, 0)
  1430. type_diff.append({
  1431. "缺陷类型": t,
  1432. "异常占比": f"{a_rate:.1%}",
  1433. "正常占比": f"{n_rate:.1%}",
  1434. "偏差": f"{'+' if a_rate > n_rate else ''}{(a_rate - n_rate) / max(n_rate, 0.001) * 100:.0f}%",
  1435. })
  1436. st.dataframe(pd.DataFrame(type_diff).sort_values("偏差", key=lambda x: x.str.rstrip("%").astype(float), ascending=False),
  1437. use_container_width=True, hide_index=True)
  1438. # ========== Tab 11: 多层叠加分析 ==========
  1439. _t = get_tab("🔲 多层叠加分析")
  1440. if _t:
  1441. with _t:
  1442. st.header("🔲 多层叠加分析")
  1443. st.markdown(
  1444. "将缺陷数据与面板物理区域、设备座号、时间维度叠加在同一视图上,"
  1445. "揭示单一维度看不到的深层关联。"
  1446. )
  1447. pw = df["panel_width_mm"].iloc[0]
  1448. ph = df["panel_height_mm"].iloc[0]
  1449. # --- 自定义区域定义 ---
  1450. st.subheader("📐 自定义区域缺陷统计")
  1451. st.markdown("将面板划分为不同功能区域,统计各区域缺陷分布")
  1452. # 定义区域:(名称, 判定函数)
  1453. # 边缘区:距四边 < 15%
  1454. # 中心区:距中心 < 20% 半径
  1455. # 角落区:四个角的 15% 范围
  1456. # FPC区:Y > 70% 高度
  1457. # 上半区/下半区
  1458. def classify_zone(x_norm, y_norm):
  1459. """将每个缺陷点分类到区域"""
  1460. zones = []
  1461. for i in range(len(x_norm)):
  1462. zx, zy = x_norm[i], y_norm[i]
  1463. zone_list = []
  1464. # 边缘区
  1465. if min(zx, 1 - zx, zy, 1 - zy) < 0.15:
  1466. zone_list.append("边缘区")
  1467. # 中心区
  1468. if np.sqrt((zx - 0.5)**2 + (zy - 0.5)**2) < 0.20:
  1469. zone_list.append("中心区")
  1470. # 角落区
  1471. if (zx < 0.15 or zx > 0.85) and (zy < 0.15 or zy > 0.85):
  1472. zone_list.append("角落区")
  1473. # FPC区
  1474. if zy > 0.70:
  1475. zone_list.append("FPC区")
  1476. # 上半区
  1477. if zy < 0.50:
  1478. zone_list.append("上半区")
  1479. # 下半区
  1480. if zy > 0.50:
  1481. zone_list.append("下半区")
  1482. if not zone_list:
  1483. zone_list.append("其他区域")
  1484. zones.append(", ".join(zone_list))
  1485. return zones
  1486. # 计算每个缺陷的区域归属
  1487. x_norm_arr = filtered_df["x_mm"].values / pw
  1488. y_norm_arr = filtered_df["y_mm"].values / ph
  1489. filtered_df_copy = filtered_df.copy()
  1490. filtered_df_copy["zone"] = classify_zone(x_norm_arr, y_norm_arr)
  1491. # 统计各区域缺陷数
  1492. zone_counts = {}
  1493. zone_types = ["边缘区", "中心区", "角落区", "FPC区", "上半区", "下半区", "其他区域"]
  1494. for z in zone_types:
  1495. count = filtered_df_copy["zone"].str.contains(z).sum()
  1496. zone_counts[z] = count
  1497. col_z1, col_z2 = st.columns([1, 2])
  1498. with col_z1:
  1499. st.subheader("区域缺陷统计")
  1500. for z in zone_types:
  1501. count = zone_counts.get(z, 0)
  1502. pct = count / max(len(filtered_df_copy), 1) * 100
  1503. bar_len = int(pct / 100 * 200)
  1504. bar = "█" * max(bar_len, 0)
  1505. st.markdown(f"{z} | {bar} **{count}** ({pct:.1f}%)")
  1506. with col_z2:
  1507. # 区域可视化
  1508. fig_zone, ax_zone = plt.subplots(figsize=(4, 6))
  1509. # 面板背景
  1510. ax_zone.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  1511. # 区域边界
  1512. # 边缘区 (15% 边界)
  1513. margin_x = pw * 0.15
  1514. margin_y = ph * 0.15
  1515. ax_zone.add_patch(plt.Rectangle((0, 0), margin_x, ph, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  1516. ax_zone.add_patch(plt.Rectangle((pw - margin_x, 0), margin_x, ph, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  1517. ax_zone.add_patch(plt.Rectangle((0, 0), pw, margin_y, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  1518. ax_zone.add_patch(plt.Rectangle((0, ph - margin_y), pw, margin_y, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  1519. # 中心区 (20% 半径)
  1520. center_r = 0.20 * max(pw, ph) / 2
  1521. circle = plt.Circle((pw/2, ph/2), center_r, fill=False, edgecolor="cyan", linewidth=1.5, alpha=0.5, linestyle="--")
  1522. ax_zone.add_patch(circle)
  1523. # FPC区
  1524. fpc_y = ph * 0.70
  1525. ax_zone.add_patch(plt.Rectangle((0, fpc_y), pw, ph - fpc_y, fill=False, edgecolor="magenta", linewidth=1.5, alpha=0.5, linestyle="--"))
  1526. # 缺陷散点
  1527. scatter_colors = {"边缘区": "yellow", "中心区": "cyan", "角落区": "orange",
  1528. "FPC区": "magenta", "上半区": "#4ECDC4", "下半区": "#45B7D1", "其他区域": "gray"}
  1529. for z_name in zone_types:
  1530. z_mask = filtered_df_copy["zone"].str.contains(z_name)
  1531. if z_mask.sum() > 0:
  1532. z_data = filtered_df_copy[z_mask]
  1533. ax_zone.scatter(z_data["x_mm"], z_data["y_mm"],
  1534. c=scatter_colors.get(z_name, "gray"), s=5, alpha=0.3,
  1535. label=f"{z_name} ({z_mask.sum()})", edgecolors="none", zorder=2)
  1536. ax_zone.set_xlim(-5, pw + 5)
  1537. ax_zone.set_ylim(-5, ph + 5)
  1538. ax_zone.set_title("缺陷区域叠加图 (虚线=区域边界)")
  1539. ax_zone.set_xlabel("X (mm)")
  1540. ax_zone.set_ylabel("Y (mm)")
  1541. ax_zone.set_aspect("equal")
  1542. ax_zone.legend(fontsize=7, loc="upper right", ncol=1, framealpha=0.7)
  1543. st.pyplot(fig_zone)
  1544. plt.close()
  1545. # --- 跨批次同座号面板对比 ---
  1546. st.divider()
  1547. st.subheader("🔀 跨批次同座号面板对比")
  1548. st.markdown(
  1549. "选择一台设备和一个座号,查看该座号在不同批次生产的面板上缺陷分布的对比。"
  1550. "如果同一座号持续在相同位置产生缺陷 → 该座号存在系统性问题。"
  1551. )
  1552. col_cmp1, col_cmp2, col_cmp3 = st.columns(3)
  1553. with col_cmp1:
  1554. cmp_eq = st.selectbox("选择设备", options=sorted(df["equipment_id"].unique()), key="cmp_eq")
  1555. with col_cmp2:
  1556. eq_seats = sorted(df[(df["equipment_id"] == cmp_eq)]["seat_id"].unique())
  1557. cmp_seat = st.selectbox("选择座号", options=eq_seats, key="cmp_seat")
  1558. with col_cmp3:
  1559. # 找出有该设备座号缺陷的批次
  1560. eq_seat_batches = sorted(df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat)]["batch_id"].unique())
  1561. cmp_batches = st.multiselect("选择对比批次", options=eq_seat_batches, default=eq_seat_batches[:3] if len(eq_seat_batches) >= 3 else eq_seat_batches)
  1562. if cmp_batches and len(cmp_batches) >= 2:
  1563. n_cols = min(len(cmp_batches), 3)
  1564. n_rows = (len(cmp_batches) + n_cols - 1) // n_cols
  1565. fig_cmp, axes_cmp = plt.subplots(n_rows, n_cols, figsize=(3.5 * n_cols, 5 * n_rows))
  1566. axes_cmp = axes_cmp.flatten() if n_cols * n_rows > 1 else [axes_cmp]
  1567. for i, batch in enumerate(cmp_batches):
  1568. ax = axes_cmp[i]
  1569. batch_data = df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == batch)]
  1570. # 面板背景
  1571. ax.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=1))
  1572. if len(batch_data) > 0:
  1573. # 按缺陷类型着色
  1574. type_colors = {"划痕": "red", "亮点": "yellow", "暗点": "black", "气泡": "cyan",
  1575. "色差": "magenta", "漏光": "orange", "裂纹": "darkred", "异物": "green"}
  1576. for _, row in batch_data.iterrows():
  1577. c = type_colors.get(row["defect_type"], "white")
  1578. ax.scatter(row["x_mm"], row["y_mm"], c=c, s=30, alpha=0.7, edgecolors="white", linewidth=0.3, zorder=3)
  1579. ax.set_xlim(-3, pw + 3)
  1580. ax.set_ylim(-3, ph + 3)
  1581. ax.set_title(f"{batch}\n{len(batch_data)} 缺陷", fontsize=9)
  1582. ax.set_aspect("equal")
  1583. ax.grid(True, alpha=0.1, color="gray")
  1584. ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
  1585. # 隐藏多余子图
  1586. for j in range(len(cmp_batches), len(axes_cmp)):
  1587. axes_cmp[j].set_visible(False)
  1588. fig_cmp.suptitle(f"{cmp_eq} / {cmp_seat} 跨批次对比", fontsize=12, y=1.01)
  1589. plt.tight_layout()
  1590. st.pyplot(fig_cmp)
  1591. plt.close()
  1592. # 对比统计
  1593. st.subheader("对比统计")
  1594. comp_stats = []
  1595. for batch in cmp_batches:
  1596. batch_data = df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == batch)]
  1597. comp_stats.append({
  1598. "批次": batch,
  1599. "缺陷数": len(batch_data),
  1600. "主要类型": batch_data["defect_type"].mode().iloc[0] if len(batch_data) > 0 else "-",
  1601. "严重占比": f"{(batch_data['severity']=='严重').sum() / max(len(batch_data), 1):.0%}",
  1602. "中心X": round(batch_data["x_mm"].mean(), 1) if len(batch_data) > 0 else "-",
  1603. "中心Y": round(batch_data["y_mm"].mean(), 1) if len(batch_data) > 0 else "-",
  1604. })
  1605. st.dataframe(pd.DataFrame(comp_stats), use_container_width=True, hide_index=True)
  1606. # 趋势判断
  1607. if len(cmp_batches) >= 3:
  1608. defect_counts = [len(df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == b)]) for b in cmp_batches]
  1609. x_trend = np.arange(len(cmp_batches))
  1610. coeffs = np.polyfit(x_trend, defect_counts, 1)
  1611. slope = coeffs[0]
  1612. if slope > 0.5:
  1613. st.warning(f"⚠️ **{cmp_eq}/{cmp_seat}** 缺陷数呈**上升趋势** (斜率: {slope:.1f}/批次),建议安排设备检修")
  1614. elif slope < -0.5:
  1615. st.success(f"✅ **{cmp_eq}/{cmp_seat}** 缺陷数呈**改善趋势** (斜率: {slope:.1f}/批次)")
  1616. else:
  1617. st.info(f"➡️ **{cmp_eq}/{cmp_seat}** 缺陷数**平稳** (斜率: {slope:.1f}/批次)")
  1618. else:
  1619. st.info("请选择至少 2 个批次进行对比")
  1620. # --- 缺陷传播追踪 ---
  1621. st.divider()
  1622. st.subheader("📡 缺陷坐标传播追踪")
  1623. st.markdown(
  1624. "追踪同一坐标区域在时间轴上的缺陷演变,识别持续恶化的位置。"
  1625. "如果某坐标的缺陷数量随时间递增 → 该位置存在渐进性损伤(如吸嘴持续磨损)。"
  1626. )
  1627. # 坐标分桶 + 时间维度
  1628. prop_bin = st.slider("传播追踪分桶大小 (mm)", min_value=10, max_value=50, value=20, step=10)
  1629. df_time = df.copy()
  1630. df_time["x_bin"] = (df_time["x_mm"] // prop_bin).astype(int)
  1631. df_time["y_bin"] = (df_time["y_mm"] // prop_bin).astype(int)
  1632. # 按桶 + 日期聚合
  1633. prop_df = df_time.groupby(["x_bin", "y_bin", "day"]).size().reset_index(name="defect_count")
  1634. # 找出至少有 3 天数据的桶
  1635. bucket_days = prop_df.groupby(["x_bin", "y_bin"])["day"].nunique()
  1636. active_buckets = bucket_days[bucket_days >= 3].index.tolist()
  1637. if active_buckets:
  1638. # 选择要追踪的桶
  1639. bucket_options = [f"({bx},{by})" for bx, by in active_buckets]
  1640. bucket_counts = prop_df.groupby(["x_bin", "y_bin"])["defect_count"].sum().sort_values(ascending=False)
  1641. # 默认选缺陷最多的桶
  1642. default_top = bucket_counts.index[0]
  1643. selected_bucket = st.selectbox(
  1644. "选择要追踪的坐标桶",
  1645. options=bucket_options,
  1646. index=0,
  1647. format_func=lambda x: f"{x} (总缺陷: {bucket_counts.loc[tuple(map(int, x.strip('()').split(',')))]:.0f})"
  1648. )
  1649. bx, by = map(int, selected_bucket.strip("()").split(","))
  1650. bucket_timeline = prop_df[(prop_df["x_bin"] == bx) & (prop_df["y_bin"] == by)].sort_values("day")
  1651. bucket_timeline["day"] = pd.to_datetime(bucket_timeline["day"])
  1652. # 传播趋势图
  1653. fig_prop, ax_prop = plt.subplots(figsize=(12, 4))
  1654. ax_prop.bar(bucket_timeline["day"], bucket_timeline["defect_count"],
  1655. color="steelblue", alpha=0.7, width=0.8)
  1656. # 趋势线
  1657. if len(bucket_timeline) >= 2:
  1658. x_t = np.arange(len(bucket_timeline))
  1659. coeffs_p = np.polyfit(x_t, bucket_timeline["defect_count"].values, 1)
  1660. slope_p = coeffs_p[0]
  1661. trend_y = np.polyval(coeffs_p, x_t)
  1662. ax_prop.plot(bucket_timeline["day"], trend_y, color="red", linestyle="--",
  1663. linewidth=2, label=f"趋势 (斜率: {slope_p:.2f}/天)")
  1664. if slope_p > 0.3:
  1665. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数上升 (恶化趋势)")
  1666. elif slope_p < -0.3:
  1667. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数下降 (改善趋势)")
  1668. else:
  1669. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数平稳")
  1670. else:
  1671. ax_prop.set_title(f"坐标桶 ({bx},{by})")
  1672. ax_prop.set_ylabel("缺陷数量")
  1673. ax_prop.tick_params(axis="x", rotation=45)
  1674. ax_prop.legend()
  1675. ax_prop.grid(True, alpha=0.3, axis="y")
  1676. st.pyplot(fig_prop)
  1677. plt.close()
  1678. # 该桶的缺陷类型演变
  1679. bucket_data = df_time[(df_time["x_bin"] == bx) & (df_time["y_bin"] == by)]
  1680. st.markdown(f"**坐标桶 ({bx},{by}) 缺陷类型演变** (对应面板区域: X {bx*prop_bin}-{(bx+1)*prop_bin}mm, Y {by*prop_bin}-{(by+1)*prop_bin}mm)")
  1681. bucket_type_timeline = bucket_data.groupby(["day", "defect_type"]).size().unstack(fill_value=0)
  1682. bucket_type_timeline.index = pd.to_datetime(bucket_type_timeline.index)
  1683. st.dataframe(bucket_type_timeline, use_container_width=True, height=300)
  1684. else:
  1685. st.info("当前数据中无足够多天数的连续缺陷坐标桶 (需 ≥3 天)")
  1686. # --- 底部:数据导出 ---
  1687. st.divider()
  1688. if current_config["show_export"]:
  1689. st.subheader("📥 数据导出")
  1690. # 综合报告导出
  1691. st.subheader("📋 一键导出综合报告")
  1692. st.markdown("包含所有分析模块的关键结论,适合汇报和存档。")
  1693. report_parts = []
  1694. report_parts.append("# 缺陷集中性分析综合报告\n")
  1695. report_parts.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  1696. report_parts.append(f"**数据范围**: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}")
  1697. report_parts.append(f"**筛选后缺陷数**: {len(filtered_df)} 条")
  1698. report_parts.append(f"**涉及面板**: {filtered_df['panel_id'].nunique()} 块")
  1699. report_parts.append(f"**视图模式**: {view_mode}\n")
  1700. # 1. KPI 摘要
  1701. report_parts.append("## 1. KPI 摘要\n")
  1702. total_panels_inspected_r = df[df["timestamp"] >= start_date]["panel_id"].nunique()
  1703. defective_panels_r = filtered_df["panel_id"].nunique()
  1704. yield_rate_r = (1 - defective_panels_r / max(total_panels_inspected_r, 1)) * 100
  1705. report_parts.append(f"- 检测面板数: {total_panels_inspected_r} 块")
  1706. report_parts.append(f"- 不良面板数: {defective_panels_r} 块 ({defective_panels_r/total_panels_inspected_r*100:.1f}%)")
  1707. report_parts.append(f"- 综合良率: {yield_rate_r:.1f}%")
  1708. report_parts.append(f"- 缺陷总数: {len(filtered_df)} 个")
  1709. report_parts.append(f"- 严重缺陷: {(filtered_df['severity']=='严重').sum()} 个\n")
  1710. # 2. 缺陷类型
  1711. report_parts.append("## 2. 缺陷类型分布\n")
  1712. type_counts_r = filtered_df["defect_type"].value_counts()
  1713. for t, c in type_counts_r.items():
  1714. report_parts.append(f"- {t}: {c} ({c/len(filtered_df)*100:.1f}%)")
  1715. report_parts.append("")
  1716. # 3. 设备/座号
  1717. if "equipment_id" in filtered_df.columns:
  1718. report_parts.append("## 3. 设备与座号分布\n")
  1719. eq_counts = filtered_df["equipment_id"].value_counts()
  1720. for e, c in eq_counts.items():
  1721. report_parts.append(f"- {e}: {c} 个缺陷")
  1722. seat_top = filtered_df["seat_id"].value_counts().head(5)
  1723. report_parts.append(f"\n**缺陷座号 TOP5**:")
  1724. for i, (s, c) in enumerate(seat_top.items(), 1):
  1725. report_parts.append(f" {i}. {s}: {c} 个")
  1726. report_parts.append("")
  1727. # 4. 趋势
  1728. report_parts.append("## 4. 趋势分析\n")
  1729. daily_r = filtered_df.groupby("day").size()
  1730. if len(daily_r) >= 2:
  1731. x_r = np.arange(len(daily_r))
  1732. coeffs_r = np.polyfit(x_r, daily_r.values.astype(float), 1)
  1733. slope_r = coeffs_r[0]
  1734. if slope_r > 0:
  1735. report_parts.append(f"- 缺陷数趋势: **上升** (斜率 {slope_r:.1f}/天)")
  1736. else:
  1737. report_parts.append(f"- 缺陷数趋势: **下降** (斜率 {slope_r:.1f}/天)")
  1738. report_parts.append("")
  1739. # 5. 异常座号
  1740. report_parts.append("## 5. 异常检测\n")
  1741. if "seat_id" in filtered_df.columns:
  1742. all_seat_stats_r = filtered_df.groupby(["equipment_id", "seat_id"]).size()
  1743. mean_r = all_seat_stats_r.mean()
  1744. std_r = all_seat_stats_r.std()
  1745. threshold_2x_r = mean_r + 2 * std_r
  1746. critical_r = all_seat_stats_r[all_seat_stats_r > threshold_2x_r]
  1747. if len(critical_r) > 0:
  1748. report_parts.append(f"- ⚠️ 2σ 异常座号: {len(critical_r)} 个")
  1749. for (eq, seat), count in critical_r.items():
  1750. report_parts.append(f" - {eq}/{seat}: {count} 个缺陷")
  1751. else:
  1752. report_parts.append("- ✅ 无 2σ 异常座号")
  1753. report_parts.append("")
  1754. # 6. 建议
  1755. report_parts.append("## 6. 建议\n")
  1756. top_type = type_counts_r.index[0] if len(type_counts_r) > 0 else "-"
  1757. top_eq = eq_counts.index[0] if len(eq_counts) > 0 else "-"
  1758. report_parts.append(f"- 重点关注缺陷类型: **{top_type}**")
  1759. report_parts.append(f"- 重点关注设备: **{top_eq}**")
  1760. report_parts.append("- 建议查看 SPC 控制图确认趋势状态")
  1761. report_parts.append("- 建议检查设备健康评分\n")
  1762. report_parts.append("---\n*本报告由缺陷集中性分析系统自动生成*")
  1763. full_report = "\n".join(report_parts)
  1764. col_exp1, col_exp2, col_exp3 = st.columns(3)
  1765. with col_exp1:
  1766. st.download_button(
  1767. label="📥 综合报告 (MD)",
  1768. data=full_report.encode("utf-8"),
  1769. file_name=f"defect_report_{datetime.now().strftime('%Y%m%d')}.md",
  1770. mime="text/markdown",
  1771. use_container_width=True
  1772. )
  1773. with col_exp2:
  1774. csv_data = filtered_df.to_csv(index=False).encode("utf-8-sig")
  1775. st.download_button(
  1776. label="📥 筛选数据 (CSV)",
  1777. data=csv_data,
  1778. file_name=f"defect_data_{datetime.now().strftime('%Y%m%d')}.csv",
  1779. mime="text/csv",
  1780. use_container_width=True
  1781. )
  1782. with col_exp3:
  1783. # 精简版 TXT 报告
  1784. txt_lines = ["缺陷集中性分析报告", f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
  1785. f"缺陷数: {len(filtered_df)} | 面板: {filtered_df['panel_id'].nunique()}",
  1786. f"良率: {yield_rate_r:.1f}%"]
  1787. for t, c in type_counts_r.head(3).items():
  1788. txt_lines.append(f" TOP: {t} {c}个")
  1789. txt_content = "\n".join(txt_lines)
  1790. st.download_button(
  1791. label="📥 精简报告 (TXT)",
  1792. data=txt_content.encode("utf-8"),
  1793. file_name=f"defect_summary_{datetime.now().strftime('%Y%m%d')}.txt",
  1794. mime="text/plain",
  1795. use_container_width=True
  1796. )