app.py 120 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657
  1. """
  2. 缺陷集中性分析 - Streamlit 交互式可视化页面
  3. """
  4. import pandas as pd
  5. import numpy as np
  6. import matplotlib
  7. matplotlib.use("Agg")
  8. import matplotlib.pyplot as plt
  9. import matplotlib.font_manager as fm
  10. import seaborn as sns
  11. import streamlit as st
  12. import plotly.express as px
  13. import plotly.graph_objects as go
  14. import os
  15. from datetime import datetime
  16. from sklearn.cluster import DBSCAN
  17. from sklearn.decomposition import PCA
  18. from sklearn.preprocessing import StandardScaler
  19. from defect_analysis.data_quality import build_data_quality_report
  20. from defect_analysis.cases import (
  21. VALID_CASE_STATUSES,
  22. VALID_CASE_TRANSITIONS,
  23. create_root_cause_case,
  24. get_audit_logs,
  25. list_cases,
  26. update_case_status,
  27. )
  28. from app_utils import (
  29. apply_defect_filters,
  30. build_diagnostic_dashboard,
  31. build_ml_factor_insights,
  32. calculate_kpis,
  33. calculate_spc_metrics,
  34. generate_industry_diagnosis,
  35. get_missing_required_columns,
  36. normalize_defect_schema,
  37. TEMPLATE_COLUMNS,
  38. )
  39. # --- 中文字体设置 ---
  40. def setup_chinese_font():
  41. """设置中文字体"""
  42. font_paths = [
  43. r"C:\Windows\Fonts\msyh.ttc", # 微软雅黑
  44. r"C:\Windows\Fonts\simhei.ttf", # 黑体
  45. r"C:\Windows\Fonts\simsun.ttc", # 宋体
  46. r"C:\Windows\Fonts\malgun.ttf", # Malgun Gothic
  47. ]
  48. for fp in font_paths:
  49. if os.path.exists(fp):
  50. font_prop = fm.FontProperties(fname=fp)
  51. plt.rcParams["font.family"] = font_prop.get_name()
  52. plt.rcParams["axes.unicode_minus"] = False
  53. return font_prop
  54. # fallback
  55. plt.rcParams["font.sans-serif"] = ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
  56. plt.rcParams["axes.unicode_minus"] = False
  57. return None
  58. setup_chinese_font()
  59. # --- 页面配置 ---
  60. st.set_page_config(
  61. page_title="屏幕缺陷集中性分析",
  62. page_icon="🔍",
  63. layout="wide",
  64. initial_sidebar_state="expanded"
  65. )
  66. # --- 侧边栏 ---
  67. st.sidebar.title("🔍 筛选条件")
  68. # --- 数据源切换 ---
  69. st.sidebar.divider()
  70. st.sidebar.subheader("📂 数据源")
  71. data_source = st.sidebar.radio("选择数据源", ["内置模拟数据", "上传CSV文件"], label_visibility="collapsed")
  72. uploaded_df = None
  73. if data_source == "上传CSV文件":
  74. uploaded_file = st.sidebar.file_uploader("上传CSV文件", type=["csv"], accept_multiple_files=False)
  75. if uploaded_file is not None:
  76. try:
  77. uploaded_df = pd.read_csv(uploaded_file, parse_dates=["timestamp"])
  78. uploaded_df["timestamp"] = pd.to_datetime(uploaded_df["timestamp"])
  79. missing = get_missing_required_columns(uploaded_df)
  80. if missing:
  81. st.sidebar.error(f"缺少字段: {', '.join(missing)}")
  82. uploaded_df = None
  83. else:
  84. uploaded_df = normalize_defect_schema(uploaded_df)
  85. st.sidebar.success(f"已加载 {len(uploaded_df)} 条记录")
  86. st.sidebar.caption("已自动补齐缺陷几何、多工序机台、治具和材料批次等可选行业字段")
  87. # 下载模板
  88. template_df = pd.DataFrame(columns=TEMPLATE_COLUMNS)
  89. csv_template = template_df.to_csv(index=False, encoding="utf-8-sig")
  90. st.sidebar.download_button(
  91. label="📋 下载数据格式模板",
  92. data=csv_template,
  93. file_name="defect_data_template.csv",
  94. mime="text/csv"
  95. )
  96. except Exception as e:
  97. st.sidebar.error(f"CSV解析失败: {e}")
  98. uploaded_df = None
  99. else:
  100. st.sidebar.info("请选择一个CSV文件上传")
  101. # --- 加载数据 ---
  102. @st.cache_data(ttl=300)
  103. def load_data_from_csv():
  104. """加载内置模拟数据"""
  105. if not os.path.exists("defect_data.csv"):
  106. st.error("未找到 defect_data.csv,请先运行 generate_data.py 生成数据")
  107. return None
  108. df = pd.read_csv("defect_data.csv", parse_dates=["timestamp"])
  109. return normalize_defect_schema(df)
  110. @st.cache_data(ttl=300, show_spinner=False)
  111. def build_cached_ml_factor_insights(data, target_defect_type, model_name, top_n):
  112. """缓存 ML 训练洞察,避免页面交互时重复训练。"""
  113. return build_ml_factor_insights(
  114. data,
  115. target_defect_type=target_defect_type,
  116. model_name=model_name,
  117. top_n=top_n,
  118. )
  119. if data_source == "上传CSV文件" and uploaded_df is not None:
  120. df = uploaded_df
  121. else:
  122. df = load_data_from_csv()
  123. if df is None:
  124. st.stop()
  125. # --- 数据库路径 ---
  126. st.sidebar.divider()
  127. st.sidebar.subheader("🗄️ 数据库")
  128. db_path = st.sidebar.text_input(
  129. "数据库路径",
  130. value="defect_analysis.db",
  131. help="Case 管理和数据持久化使用的 SQLite 数据库路径",
  132. )
  133. # --- 角色视图 ---
  134. st.sidebar.divider()
  135. st.sidebar.subheader("👤 视图模式")
  136. view_mode = st.sidebar.selectbox(
  137. "选择视图模式",
  138. options=["操作员", "工程师", "管理者"],
  139. index=1,
  140. help="操作员: 基础分析 | 工程师: 全部功能 | 管理者: KPI+SPC+健康评分"
  141. )
  142. # 各角色可见的 Tab
  143. tab_visibility = {
  144. "操作员": {
  145. "tabs": ["🗺️ 空间集中性", "📊 类型集中性 (帕累托)", "📈 时间集中性",
  146. "🏗️ 设备座号集中性", "🔬 缺陷模式识别", "🧭 诊断驾驶舱"],
  147. "show_kpi": True,
  148. "show_export": True,
  149. },
  150. "工程师": {
  151. "tabs": "all",
  152. "show_kpi": True,
  153. "show_export": True,
  154. },
  155. "管理者": {
  156. "tabs": ["🚨 SPC 控制图与预警", "🔬 缺陷模式识别", "💚 设备健康与共性分析",
  157. "📊 类型集中性 (帕累托)", "📈 时间集中性", "🧭 诊断驾驶舱", "📋 Case 管理"],
  158. "show_kpi": True,
  159. "show_export": True,
  160. },
  161. }
  162. # 应用 Tab 可见性
  163. current_config = tab_visibility[view_mode]
  164. # --- 筛选条件 ---
  165. # 日期范围
  166. min_date = df["timestamp"].min().date()
  167. max_date = df["timestamp"].max().date()
  168. date_range = st.sidebar.date_input(
  169. "日期范围",
  170. value=[min_date, max_date],
  171. min_value=min_date,
  172. max_value=max_date
  173. )
  174. if len(date_range) == 2:
  175. start_date, end_date = pd.Timestamp(date_range[0]), pd.Timestamp(date_range[1])
  176. else:
  177. start_date, end_date = pd.Timestamp(min_date), pd.Timestamp(max_date)
  178. # 缺陷类型
  179. all_types = sorted(df["defect_type"].unique())
  180. selected_types = st.sidebar.multiselect("缺陷类型", options=all_types, default=all_types)
  181. # 班次
  182. shift_options = ["全部", "白班", "夜班"]
  183. selected_shift = st.sidebar.radio("班次", options=shift_options)
  184. # 批次
  185. all_batches = sorted(df["batch_id"].unique())
  186. selected_batches = st.sidebar.multiselect("批次", options=all_batches, default=all_batches)
  187. # 严重程度
  188. all_severities = ["全部", "轻微", "中等", "严重"]
  189. selected_severity = st.sidebar.selectbox("严重程度", options=all_severities)
  190. # 设备
  191. all_equipment = sorted(df["equipment_id"].unique())
  192. selected_equipment = st.sidebar.multiselect("前贴附设备", options=all_equipment, default=all_equipment)
  193. # 座号(随设备联动)
  194. if selected_equipment:
  195. eq_seats = sorted(df[df["equipment_id"].isin(selected_equipment)]["seat_id"].unique())
  196. selected_seats = st.sidebar.multiselect("座号", options=eq_seats, default=eq_seats)
  197. else:
  198. selected_seats = []
  199. filtered_df = apply_defect_filters(
  200. df,
  201. start_date=start_date,
  202. end_date=end_date,
  203. selected_types=selected_types,
  204. selected_batches=selected_batches,
  205. selected_equipment=selected_equipment,
  206. selected_seats=selected_seats,
  207. selected_shift=selected_shift,
  208. selected_severity=selected_severity,
  209. )
  210. # ========== KPI 看板 ==========
  211. kpis = calculate_kpis(df, filtered_df)
  212. total_panels_inspected = kpis["total_panels_inspected"]
  213. defective_panels = kpis["defective_panels"]
  214. yield_rate = kpis["yield_rate"]
  215. total_defects = kpis["total_defects"]
  216. critical_defects = kpis["critical_defects"]
  217. top_defect_type = kpis["top_defect_type"]
  218. kpi1, kpi2, kpi3, kpi4, kpi5, kpi6 = st.columns(6)
  219. kpi1.metric("检测面板数", f"{total_panels_inspected} 块")
  220. kpi2.metric("不良面板数", f"{defective_panels} 块", delta=f"{defective_panels/total_panels_inspected*100:.1f}%" if total_panels_inspected > 0 else "0%")
  221. kpi3.metric("综合良率", f"{yield_rate:.1f}%", delta=f"{yield_rate - 95:.1f}%", delta_color="normal" if yield_rate >= 95 else "inverse")
  222. kpi4.metric("缺陷总数", f"{total_defects} 个")
  223. kpi5.metric("严重缺陷", f"{critical_defects} 个", delta=f"{critical_defects/max(total_defects,1)*100:.1f}%" if total_defects > 0 else "0%")
  224. kpi6.metric("主要缺陷类型", top_defect_type)
  225. # 第二排 KPI
  226. eq_concentrated = False
  227. if "equipment_id" in filtered_df.columns:
  228. eq_stats = filtered_df.groupby("equipment_id").size()
  229. top_eq = eq_stats.idxmax() if len(eq_stats) > 0 else "-"
  230. top_eq_count = eq_stats.max() if len(eq_stats) > 0 else 0
  231. else:
  232. top_eq, top_eq_count = "-", 0
  233. seat_concentrated = False
  234. if "seat_id" in filtered_df.columns and len(filtered_df) > 0:
  235. seat_stats = filtered_df.groupby("seat_id").size()
  236. if len(seat_stats) > 0:
  237. top_seat = seat_stats.idxmax()
  238. top_seat_count = seat_stats.max()
  239. avg_seat_count = seat_stats.mean()
  240. if top_seat_count > avg_seat_count * 2:
  241. seat_concentrated = True
  242. else:
  243. top_seat, top_seat_count = "-", 0
  244. else:
  245. top_seat, top_seat_count = "-", 0
  246. kpi7, kpi8, kpi9 = st.columns(3)
  247. kpi7.metric("最高缺陷设备", str(top_eq), f"{top_eq_count} 个缺陷")
  248. kpi8.metric("最高缺陷座号", str(top_seat), f"{top_seat_count} 个缺陷")
  249. if seat_concentrated:
  250. kpi9.metric("座号集中性", "⚠️ 存在集中", delta="需关注", delta_color="inverse")
  251. else:
  252. kpi9.metric("座号集中性", "✅ 正常分布")
  253. # --- 主标题 ---
  254. st.title("📊 屏幕缺陷集中性分析系统")
  255. st.markdown(f"**数据范围**: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')} | "
  256. f"**筛选后缺陷数**: {len(filtered_df)} 条 | "
  257. f"**涉及面板**: {filtered_df['panel_id'].nunique()} 块")
  258. st.divider()
  259. if filtered_df.empty:
  260. st.warning("当前筛选条件下没有缺陷记录,请放宽日期、批次、设备或缺陷类型筛选。")
  261. st.stop()
  262. # --- Tab 布局 (按角色动态) ---
  263. ALL_TABS = [
  264. "🧭 诊断驾驶舱",
  265. "🔬 ML 因子分析",
  266. "📋 Case 管理",
  267. "🗺️ 空间集中性",
  268. "📊 类型集中性 (帕累托)",
  269. "📈 时间集中性",
  270. "🏭 批次集中性",
  271. "🏗️ 设备座号集中性",
  272. "🔗 关联分析",
  273. "🧠 智能缺陷聚类 (DBSCAN)",
  274. "🚨 SPC 控制图与预警",
  275. "🔬 缺陷模式识别",
  276. "💚 设备健康与共性分析",
  277. "🔲 多层叠加分析"
  278. ]
  279. if current_config["tabs"] == "all":
  280. visible_tabs = ALL_TABS
  281. else:
  282. visible_tabs = [t for t in ALL_TABS if t in current_config["tabs"]]
  283. tab_containers = st.tabs(visible_tabs)
  284. tab_map = {name: container for name, container in zip(visible_tabs, tab_containers)}
  285. def get_tab(name):
  286. """获取指定 Tab 容器,如果不可见则返回 None"""
  287. return tab_map.get(name)
  288. # ========== Tab 0: 诊断驾驶舱 ==========
  289. _t = get_tab("🧭 诊断驾驶舱")
  290. if _t:
  291. with _t:
  292. dashboard = build_diagnostic_dashboard(filtered_df)
  293. industry_diagnosis = generate_industry_diagnosis(filtered_df, dashboard)
  294. quality_report = build_data_quality_report(filtered_df)
  295. level_colors = {
  296. "严重": ("#7f1d1d", "#fee2e2"),
  297. "关注": ("#92400e", "#fef3c7"),
  298. "正常": ("#14532d", "#dcfce7"),
  299. }
  300. level_fg, level_bg = level_colors.get(dashboard["severity_level"], ("#334155", "#e2e8f0"))
  301. st.markdown(
  302. """
  303. <style>
  304. .diag-hero {
  305. padding: 24px 28px;
  306. border-radius: 24px;
  307. background:
  308. radial-gradient(circle at 15% 15%, rgba(20, 184, 166, .18), transparent 28%),
  309. linear-gradient(135deg, #0f172a 0%, #12343b 52%, #294936 100%);
  310. color: #f8fafc;
  311. box-shadow: 0 18px 45px rgba(15, 23, 42, .18);
  312. margin-bottom: 18px;
  313. }
  314. .diag-hero h2 { margin: 0 0 8px 0; font-size: 30px; letter-spacing: .02em; }
  315. .diag-hero p { margin: 0; color: #cbd5e1; font-size: 15px; }
  316. .diag-badge {
  317. display: inline-flex;
  318. align-items: center;
  319. padding: 6px 12px;
  320. border-radius: 999px;
  321. font-weight: 700;
  322. margin-bottom: 12px;
  323. }
  324. .diag-card {
  325. padding: 18px 18px;
  326. border-radius: 18px;
  327. border: 1px solid #dbe4e7;
  328. background: linear-gradient(180deg, #ffffff 0%, #f8fafc 100%);
  329. min-height: 128px;
  330. }
  331. .diag-card .label { color: #64748b; font-size: 13px; margin-bottom: 8px; }
  332. .diag-card .value { color: #0f172a; font-size: 26px; font-weight: 800; line-height: 1.1; }
  333. .diag-card .hint { color: #475569; font-size: 13px; margin-top: 10px; }
  334. </style>
  335. """,
  336. unsafe_allow_html=True,
  337. )
  338. st.markdown(
  339. f"""
  340. <div class="diag-hero">
  341. <div class="diag-badge" style="color:{level_fg}; background:{level_bg};">
  342. 当前诊断等级:{dashboard["severity_level"]}
  343. </div>
  344. <h2>缺陷诊断驾驶舱</h2>
  345. <p>{dashboard["primary_recommendation"]}</p>
  346. </div>
  347. """,
  348. unsafe_allow_html=True,
  349. )
  350. card1, card2, card3, card4 = st.columns(4)
  351. with card1:
  352. st.markdown(
  353. f"""
  354. <div class="diag-card">
  355. <div class="label">筛选后缺陷</div>
  356. <div class="value">{len(filtered_df)}</div>
  357. <div class="hint">涉及 {filtered_df["panel_id"].nunique()} 块面板</div>
  358. </div>
  359. """,
  360. unsafe_allow_html=True,
  361. )
  362. with card2:
  363. st.markdown(
  364. f"""
  365. <div class="diag-card">
  366. <div class="label">主导缺陷类型</div>
  367. <div class="value">{dashboard["top_defect_type"]}</div>
  368. <div class="hint">占全部缺陷 {dashboard["top_defect_share"]:.1%}</div>
  369. </div>
  370. """,
  371. unsafe_allow_html=True,
  372. )
  373. with card3:
  374. st.markdown(
  375. f"""
  376. <div class="diag-card">
  377. <div class="label">严重缺陷占比</div>
  378. <div class="value">{dashboard["serious_share"]:.1%}</div>
  379. <div class="hint">高于 20% 建议立即复盘</div>
  380. </div>
  381. """,
  382. unsafe_allow_html=True,
  383. )
  384. with card4:
  385. top_root = dashboard["root_causes"].iloc[0] if len(dashboard["root_causes"]) else None
  386. root_name = top_root["根因候选"] if top_root is not None else "-"
  387. root_share = top_root["占比"] if top_root is not None else 0
  388. root_lift = top_root["异常倍数"] if top_root is not None else 0
  389. st.markdown(
  390. f"""
  391. <div class="diag-card">
  392. <div class="label">首要根因候选</div>
  393. <div class="value" style="font-size:22px;">{root_name}</div>
  394. <div class="hint">贡献 {root_share:.1%} 缺陷,异常 {root_lift:.2f}x</div>
  395. </div>
  396. """,
  397. unsafe_allow_html=True,
  398. )
  399. st.markdown(
  400. f"""
  401. <div style="
  402. margin-top: 16px;
  403. padding: 18px 20px;
  404. border-radius: 18px;
  405. border: 1px solid #c7d2fe;
  406. background: linear-gradient(135deg, #eef2ff 0%, #f8fafc 55%, #ecfeff 100%);
  407. ">
  408. <div style="font-size: 13px; color: #475569; font-weight: 700; margin-bottom: 6px;">
  409. 3C 面板行业诊断结论
  410. </div>
  411. <div style="font-size: 18px; color: #0f172a; font-weight: 800;">
  412. {industry_diagnosis["headline"]}
  413. </div>
  414. </div>
  415. """,
  416. unsafe_allow_html=True,
  417. )
  418. diag_col1, diag_col2 = st.columns([1, 1])
  419. with diag_col1:
  420. st.subheader("识别到的缺陷模式")
  421. for pattern in industry_diagnosis["patterns"]:
  422. st.markdown(f"- {pattern}")
  423. with diag_col2:
  424. st.subheader("行业化排查建议")
  425. for idx, recommendation in enumerate(industry_diagnosis["recommendations"], 1):
  426. st.markdown(f"{idx}. {recommendation}")
  427. quality_cols = st.columns(5)
  428. quality_cols[0].metric("数据质量分", f"{quality_report['score']:.1f}")
  429. quality_cols[1].metric("必填完整率", f"{quality_report['required_complete_rate']:.1%}")
  430. quality_cols[2].metric("坐标合法率", f"{quality_report['coordinate_valid_rate']:.1%}")
  431. quality_cols[3].metric("枚举合法率", f"{quality_report['enum_valid_rate']:.1%}")
  432. quality_cols[4].metric("追溯覆盖率", f"{quality_report['traceability_rate']:.1%}")
  433. if quality_report["issues"] != ["数据质量良好"]:
  434. st.warning("数据质量提示:" + ";".join(quality_report["issues"]))
  435. st.divider()
  436. left, right = st.columns([1.25, 1])
  437. with left:
  438. st.subheader("交互式面板数字孪生")
  439. panel_w = float(df["panel_width_mm"].iloc[0])
  440. panel_h = float(df["panel_height_mm"].iloc[0])
  441. fig_map = go.Figure()
  442. fig_map.add_shape(
  443. type="rect",
  444. x0=0,
  445. y0=0,
  446. x1=panel_w,
  447. y1=panel_h,
  448. line=dict(color="#0f172a", width=2),
  449. fillcolor="#f8fafc",
  450. layer="below",
  451. )
  452. fig_map.add_trace(
  453. go.Scatter(
  454. x=filtered_df["x_mm"],
  455. y=filtered_df["y_mm"],
  456. mode="markers",
  457. marker=dict(
  458. size=7,
  459. color=filtered_df["severity"].map({"轻微": 1, "中等": 2, "严重": 3}),
  460. colorscale=[[0, "#38bdf8"], [0.5, "#f59e0b"], [1, "#dc2626"]],
  461. showscale=True,
  462. colorbar=dict(title="严重度"),
  463. opacity=0.72,
  464. line=dict(width=0.4, color="#ffffff"),
  465. ),
  466. text=filtered_df["defect_id"],
  467. customdata=filtered_df[["defect_type", "severity", "equipment_id", "seat_id", "batch_id"]],
  468. hovertemplate=(
  469. "缺陷ID: %{text}<br>"
  470. "坐标: (%{x:.1f}, %{y:.1f}) mm<br>"
  471. "类型: %{customdata[0]}<br>"
  472. "严重度: %{customdata[1]}<br>"
  473. "设备/座号: %{customdata[2]} / %{customdata[3]}<br>"
  474. "批次: %{customdata[4]}<extra></extra>"
  475. ),
  476. name="缺陷点",
  477. )
  478. )
  479. fig_map.add_vrect(x0=0, x1=panel_w * 0.1, fillcolor="#f97316", opacity=0.08, line_width=0)
  480. fig_map.add_vrect(x0=panel_w * 0.9, x1=panel_w, fillcolor="#f97316", opacity=0.08, line_width=0)
  481. fig_map.add_hrect(y0=panel_h * 0.72, y1=panel_h * 0.88, fillcolor="#14b8a6", opacity=0.09, line_width=0)
  482. fig_map.update_layout(
  483. height=560,
  484. margin=dict(l=18, r=18, t=30, b=18),
  485. plot_bgcolor="#ffffff",
  486. paper_bgcolor="#ffffff",
  487. xaxis=dict(title="X (mm)", range=[0, panel_w], showgrid=True, gridcolor="#e2e8f0"),
  488. yaxis=dict(title="Y (mm)", range=[0, panel_h], scaleanchor="x", scaleratio=1, showgrid=True, gridcolor="#e2e8f0"),
  489. title="按真实屏幕比例定位缺陷,橙色为边缘敏感区,青色为 FPC 关注区",
  490. )
  491. st.plotly_chart(fig_map, use_container_width=True)
  492. fig_density = px.density_heatmap(
  493. filtered_df,
  494. x="x_mm",
  495. y="y_mm",
  496. nbinsx=28,
  497. nbinsy=42,
  498. color_continuous_scale="YlOrRd",
  499. title="密度热区视图",
  500. labels={"x_mm": "X (mm)", "y_mm": "Y (mm)"},
  501. )
  502. fig_density.update_layout(height=300, margin=dict(l=18, r=18, t=42, b=18))
  503. st.plotly_chart(fig_density, use_container_width=True)
  504. with right:
  505. st.subheader("根因候选榜")
  506. root_causes = dashboard["root_causes"].copy()
  507. fig_root = px.bar(
  508. root_causes.sort_values("风险分", ascending=True),
  509. x="风险分",
  510. y="根因候选",
  511. orientation="h",
  512. color="异常倍数",
  513. color_continuous_scale="Tealrose",
  514. text="风险分",
  515. hover_data={
  516. "缺陷数": True,
  517. "占比": ":.1%",
  518. "异常倍数": ":.2f",
  519. "涉及面板": True,
  520. "主要缺陷": True,
  521. "严重占比": ":.1%",
  522. "风险分": ":.1f",
  523. },
  524. labels={"风险分": "风险分", "根因候选": ""},
  525. )
  526. fig_root.update_traces(texttemplate="%{text:.1f}", textposition="outside")
  527. fig_root.update_layout(height=360, margin=dict(l=8, r=20, t=20, b=20))
  528. st.plotly_chart(fig_root, use_container_width=True)
  529. root_table = root_causes.copy()
  530. root_table["占比"] = root_table["占比"].map(lambda v: f"{v:.1%}")
  531. root_table["异常倍数"] = root_table["异常倍数"].map(lambda v: f"{v:.2f}x")
  532. root_table["严重占比"] = root_table["严重占比"].map(lambda v: f"{v:.1%}")
  533. st.dataframe(root_table, use_container_width=True, hide_index=True)
  534. st.caption("风险分 = 贡献规模 + 异常倍数 + 严重占比 + 涉及面板数。先查高贡献且高偏离的组合。")
  535. trend_col, pareto_col = st.columns([1, 1])
  536. with trend_col:
  537. st.subheader("每日缺陷走势")
  538. daily_trend = dashboard["daily_trend"]
  539. fig_trend_dash = px.area(
  540. daily_trend,
  541. x="day",
  542. y="缺陷数",
  543. markers=True,
  544. color_discrete_sequence=["#0f766e"],
  545. labels={"day": "日期", "缺陷数": "缺陷数"},
  546. )
  547. fig_trend_dash.update_traces(line=dict(width=3), fillcolor="rgba(20, 184, 166, .22)")
  548. fig_trend_dash.update_layout(height=350, margin=dict(l=18, r=18, t=20, b=18))
  549. st.plotly_chart(fig_trend_dash, use_container_width=True)
  550. with pareto_col:
  551. st.subheader("缺陷类型 Pareto")
  552. pareto = dashboard["pareto"].head(8)
  553. fig_pareto_dash = go.Figure()
  554. fig_pareto_dash.add_trace(
  555. go.Bar(
  556. x=pareto["缺陷类型"],
  557. y=pareto["缺陷数"],
  558. marker_color="#334155",
  559. name="缺陷数",
  560. hovertemplate="%{x}<br>缺陷数: %{y}<extra></extra>",
  561. )
  562. )
  563. fig_pareto_dash.add_trace(
  564. go.Scatter(
  565. x=pareto["缺陷类型"],
  566. y=pareto["累计占比"],
  567. yaxis="y2",
  568. mode="lines+markers",
  569. line=dict(color="#dc2626", width=3),
  570. name="累计占比",
  571. hovertemplate="%{x}<br>累计占比: %{y:.1%}<extra></extra>",
  572. )
  573. )
  574. fig_pareto_dash.update_layout(
  575. height=350,
  576. margin=dict(l=18, r=18, t=20, b=18),
  577. yaxis=dict(title="缺陷数"),
  578. yaxis2=dict(title="累计占比", overlaying="y", side="right", tickformat=".0%"),
  579. legend=dict(orientation="h", y=1.12),
  580. )
  581. st.plotly_chart(fig_pareto_dash, use_container_width=True)
  582. # ========== Tab 0.5: ML 因子分析 ==========
  583. _t = get_tab("🔬 ML 因子分析")
  584. if _t:
  585. with _t:
  586. dashboard = build_diagnostic_dashboard(filtered_df)
  587. extended_root_causes = dashboard.get("extended_root_causes")
  588. st.header("根因与关键因子分析")
  589. st.markdown("综合规则评分、统计分析、机器学习验证与行业维度,输出可解释的异常候选。")
  590. ml_col1, ml_col2, ml_col3 = st.columns([1, 1, 1])
  591. with ml_col1:
  592. ml_target_type = st.selectbox(
  593. "目标缺陷",
  594. options=sorted(filtered_df["defect_type"].dropna().unique()),
  595. index=sorted(filtered_df["defect_type"].dropna().unique()).index(dashboard["top_defect_type"])
  596. if dashboard["top_defect_type"] in sorted(filtered_df["defect_type"].dropna().unique())
  597. else 0,
  598. )
  599. with ml_col2:
  600. ml_model_name = st.selectbox(
  601. "ML 模型",
  602. options=["random_forest", "logistic_regression", "xgboost", "lightgbm"],
  603. format_func=lambda name: {
  604. "random_forest": "RandomForest",
  605. "logistic_regression": "LogisticRegression",
  606. "xgboost": "XGBoost",
  607. "lightgbm": "LightGBM",
  608. }[name],
  609. )
  610. with ml_col3:
  611. ml_top_n = st.slider("候选因子数", min_value=5, max_value=30, value=10, step=5)
  612. ml_insights = build_cached_ml_factor_insights(
  613. filtered_df,
  614. ml_target_type,
  615. ml_model_name,
  616. ml_top_n,
  617. )
  618. st.divider()
  619. if extended_root_causes is not None and not extended_root_causes.empty:
  620. st.subheader("扩展根因候选")
  621. extended_table = extended_root_causes.copy()
  622. extended_table["占比"] = extended_table["占比"].map(lambda v: f"{v:.1%}")
  623. extended_table["异常倍数"] = extended_table["异常倍数"].map(lambda v: f"{v:.2f}x")
  624. extended_table["严重占比"] = extended_table["严重占比"].map(lambda v: f"{v:.1%}")
  625. st.dataframe(extended_table, use_container_width=True, hide_index=True)
  626. st.caption("覆盖治具、吸嘴、材料批次、清洗/绑定等维度,用于多前制程链路追溯。")
  627. if ml_insights["error"]:
  628. st.warning(f"ML 模型暂不可用:{ml_insights['error']}")
  629. else:
  630. metric_train = ml_insights["metrics"]
  631. metric_valid = ml_insights["validation_metrics"]
  632. m1, m2, m3, m4 = st.columns(4)
  633. m1.metric("训练准确率", f"{metric_train.get('train_accuracy', 0):.1%}")
  634. m2.metric("训练 AUC", f"{metric_train.get('train_auc', 0):.3f}")
  635. m3.metric("验证准确率", f"{metric_valid.get('validation_accuracy', 0):.1%}")
  636. m4.metric("验证 AUC", f"{metric_valid.get('validation_auc', 0):.3f}")
  637. importance_df = pd.DataFrame(ml_insights["feature_importance"])
  638. if not importance_df.empty:
  639. st.subheader("模型特征贡献 TOP")
  640. importance_df["importance"] = importance_df["importance"].map(lambda v: round(v, 4))
  641. st.dataframe(importance_df.head(15), use_container_width=True, hide_index=True)
  642. st.caption("用于判断模型主要依赖哪些设备、座号、材料批次、坐标或缺陷几何特征。")
  643. key_factors = ml_insights["key_factors"]
  644. if not key_factors.empty:
  645. st.subheader(f"关键因子分析:{ml_insights['target_defect_type']}")
  646. key_factor_table = key_factors.copy()
  647. key_factor_table["目标占比"] = key_factor_table["目标占比"].map(lambda v: f"{v:.1%}")
  648. key_factor_table["基线占比"] = key_factor_table["基线占比"].map(lambda v: f"{v:.1%}")
  649. key_factor_table["异常倍数"] = key_factor_table["异常倍数"].map(lambda v: f"{v:.2f}x")
  650. key_factor_table["支持度"] = key_factor_table["支持度"].map(lambda v: f"{v:.1%}")
  651. if "ml_probability" in key_factor_table.columns:
  652. key_factor_table["ml_probability"] = key_factor_table["ml_probability"].map(lambda v: f"{v:.1%}")
  653. st.dataframe(key_factor_table, use_container_width=True, hide_index=True)
  654. st.caption("关键因子按目标缺陷占比、异常倍数、样本数、支持度和模型概率综合排序。")
  655. else:
  656. st.info("当前数据未找到显著关键因子,可放宽筛选条件或增加样本量。")
  657. # ========== Tab: Case 管理 ==========
  658. _t = get_tab("📋 Case 管理")
  659. if _t:
  660. with _t:
  661. st.header("异常 Case 闭环管理")
  662. st.markdown("从根因分析发现异常,创建 Case 追踪改善过程,直至关闭并审计。")
  663. from defect_analysis.database import init_database
  664. init_database(db_path)
  665. # 子 Tab
  666. case_list_tab, case_create_tab, case_audit_tab = st.tabs(["Case 列表", "创建 Case", "审计日志"])
  667. # ---- Case 列表 ----
  668. with case_list_tab:
  669. status_filter = st.selectbox(
  670. "状态筛选",
  671. options=["全部"] + sorted(VALID_CASE_STATUSES),
  672. index=0,
  673. label_visibility="collapsed",
  674. )
  675. all_cases = list_cases(
  676. db_path,
  677. status=None if status_filter == "全部" else status_filter,
  678. )
  679. if all_cases.empty:
  680. st.info("暂无 Case 记录,请先在「创建 Case」中新建异常追踪。")
  681. else:
  682. status_counts = all_cases["status"].value_counts()
  683. st_cols = st.columns(len(status_counts))
  684. for idx, (status, count) in enumerate(status_counts.items()):
  685. st_cols[idx].metric(status, count)
  686. display = all_cases.copy()
  687. display["created_at"] = pd.to_datetime(display["created_at"]).dt.strftime("%Y-%m-%d %H:%M")
  688. display["updated_at"] = pd.to_datetime(display["updated_at"]).dt.strftime("%Y-%m-%d %H:%M")
  689. st.dataframe(
  690. display[["case_id", "title", "status", "candidate_type", "candidate_value",
  691. "defect_type", "panel_zone", "owner", "created_by", "created_at", "updated_at"]],
  692. use_container_width=True,
  693. hide_index=True,
  694. )
  695. # 状态更新
  696. st.subheader("更新 Case 状态")
  697. upd_col1, upd_col2, upd_col3, upd_col4 = st.columns([1, 2, 1, 2])
  698. with upd_col1:
  699. sel_case_id = st.number_input("Case ID", min_value=1, step=1, key="upd_case_id")
  700. with upd_col2:
  701. current_row = all_cases[all_cases["case_id"] == int(sel_case_id)]
  702. if not current_row.empty:
  703. current_status = current_row.iloc[0]["status"]
  704. allowed = VALID_CASE_TRANSITIONS.get(current_status, set())
  705. if allowed:
  706. st.selectbox(
  707. f"当前: {current_status} → 目标状态",
  708. options=sorted(allowed),
  709. key="upd_target",
  710. )
  711. else:
  712. st.warning(f"当前状态 {current_status} 不可流转,Case 已终态。")
  713. else:
  714. st.warning("请选择有效的 Case ID")
  715. with upd_col3:
  716. actor = st.text_input("操作人", value="engineer", key="upd_actor")
  717. with upd_col4:
  718. note = st.text_input("备注", value="", key="upd_note")
  719. can_update = not current_row.empty and bool(VALID_CASE_TRANSITIONS.get(current_row.iloc[0]["status"], set()))
  720. if st.button("确认更新状态", key="upd_submit", disabled=not can_update):
  721. try:
  722. target_status = st.session_state.get("upd_target", "")
  723. if target_status:
  724. update_case_status(
  725. db_path,
  726. case_id=int(sel_case_id),
  727. status=target_status,
  728. actor=actor or "system",
  729. note=note,
  730. )
  731. st.success(f"Case {sel_case_id} 已更新至 {target_status}")
  732. st.rerun()
  733. except ValueError as e:
  734. st.error(str(e))
  735. # ---- 创建 Case ----
  736. with case_create_tab:
  737. st.subheader("新建异常 Case")
  738. cr_col1, cr_col2 = st.columns(2)
  739. with cr_col1:
  740. cr_title = st.text_input("Case 标题", key="cr_title")
  741. cr_candidate_type = st.selectbox(
  742. "候选维度",
  743. options=[
  744. "lam_fixture_id", "lam_jig_id", "lam_nozzle_id",
  745. "material_lot_oca", "material_lot_glass", "material_lot_polarizer",
  746. "clean_equipment_id", "clean_slot_id", "bond_equipment_id", "bond_head_id",
  747. "equipment_id", "seat_id", "shift", "recipe_id",
  748. ],
  749. key="cr_type",
  750. )
  751. cr_candidate_value = st.text_input("候选值", key="cr_value")
  752. with cr_col2:
  753. cr_defect_type = st.selectbox(
  754. "缺陷类型",
  755. options=sorted(df["defect_type"].dropna().unique()),
  756. key="cr_defect",
  757. )
  758. cr_panel_zone = st.text_input("面板区域", value="", key="cr_zone")
  759. cr_owner = st.text_input("责任人", value="", key="cr_owner")
  760. cr_created_by = st.text_input("创建人", value="engineer", key="cr_creator")
  761. cr_recommendation = st.text_area("改善建议", value="", key="cr_recommendation", height=80)
  762. if st.button("创建 Case", key="cr_submit"):
  763. if not cr_title or not cr_candidate_value:
  764. st.error("标题和候选值不能为空")
  765. else:
  766. case_id = create_root_cause_case(
  767. db_path,
  768. title=cr_title,
  769. candidate_type=cr_candidate_type,
  770. candidate_value=cr_candidate_value,
  771. defect_type=cr_defect_type,
  772. panel_zone=cr_panel_zone or "未指定",
  773. owner=cr_owner or cr_created_by,
  774. created_by=cr_created_by,
  775. recommendation=cr_recommendation or "待分析",
  776. )
  777. st.success(f"Case #{case_id} 已创建")
  778. st.rerun()
  779. # ---- 审计日志 ----
  780. with case_audit_tab:
  781. st.subheader("操作审计日志")
  782. audit_filter = st.selectbox(
  783. "实体筛选",
  784. options=["全部", "case"],
  785. index=1,
  786. key="audit_entity_filter",
  787. )
  788. audit_entity_id = st.number_input(
  789. "实体 ID(留空查全部)",
  790. min_value=0,
  791. value=0,
  792. step=1,
  793. key="audit_entity_id_input",
  794. )
  795. logs = get_audit_logs(
  796. db_path,
  797. entity_type="case" if audit_filter == "case" else None,
  798. entity_id=audit_entity_id if audit_entity_id > 0 else None,
  799. )
  800. if logs.empty:
  801. st.info("暂无审计日志")
  802. else:
  803. logs["created_at"] = pd.to_datetime(logs["created_at"]).dt.strftime("%Y-%m-%d %H:%M:%S")
  804. st.dataframe(
  805. logs[["audit_id", "entity_type", "entity_id", "action", "actor", "details", "created_at"]],
  806. use_container_width=True,
  807. hide_index=True,
  808. )
  809. # ========== Tab 1: 空间集中性 ==========
  810. _t = get_tab("🗺️ 空间集中性")
  811. if _t:
  812. with _t:
  813. st.header("缺陷空间分布热力图")
  814. col1, col2 = st.columns([2, 1])
  815. with col1:
  816. # 热力图分辨率
  817. grid_size = st.slider("热力图网格分辨率", min_value=5, max_value=50, value=20)
  818. fig, axes = plt.subplots(1, 2, figsize=(14, 6))
  819. # 左图:2D 热力图
  820. x_edges = np.linspace(0, df["panel_width_mm"].iloc[0], grid_size + 1)
  821. y_edges = np.linspace(0, df["panel_height_mm"].iloc[0], grid_size + 1)
  822. H, _, _ = np.histogram2d(
  823. filtered_df["x_mm"], filtered_df["y_mm"],
  824. bins=[x_edges, y_edges]
  825. )
  826. im = axes[0].imshow(
  827. H.T, origin="lower", aspect="auto",
  828. extent=[0, df["panel_width_mm"].iloc[0], 0, df["panel_height_mm"].iloc[0]],
  829. cmap="YlOrRd"
  830. )
  831. axes[0].set_title(f"缺陷密度热力图 (总 {len(filtered_df)} 个)")
  832. axes[0].set_xlabel("X (mm)")
  833. axes[0].set_ylabel("Y (mm)")
  834. plt.colorbar(im, ax=axes[0], label="缺陷数量")
  835. # 右图:散点图(叠加)
  836. axes[1].scatter(
  837. filtered_df["x_mm"], filtered_df["y_mm"],
  838. alpha=0.3, s=5, c="red", edgecolors="none"
  839. )
  840. axes[1].set_title("缺陷位置散点图")
  841. axes[1].set_xlabel("X (mm)")
  842. axes[1].set_ylabel("Y (mm)")
  843. axes[1].set_aspect("equal")
  844. st.pyplot(fig)
  845. plt.close()
  846. with col2:
  847. st.subheader("区域统计")
  848. # 将面板分为 9 宫格
  849. x_bins = pd.cut(filtered_df["x_mm"], bins=3, labels=["左", "中", "右"])
  850. y_bins = pd.cut(filtered_df["y_mm"], bins=3, labels=["上", "中", "下"])
  851. region_df = pd.DataFrame({"X区域": x_bins, "Y区域": y_bins})
  852. region_counts = region_df.groupby(["X区域", "Y区域"], observed=False).size().unstack(fill_value=0)
  853. st.dataframe(region_counts, use_container_width=True)
  854. # 高频缺陷区域 TOP5
  855. st.subheader("高频缺陷区域 TOP5")
  856. region_df["区域"] = region_df["X区域"].astype(str) + "-" + region_df["Y区域"].astype(str)
  857. top_regions = region_df["区域"].value_counts().head(5)
  858. for i, (region, count) in enumerate(top_regions.items(), 1):
  859. st.metric(f"#{i} {region}", f"{count} 个缺陷")
  860. # --- 模拟面板缺陷标注图 ---
  861. st.divider()
  862. st.subheader("🖼️ 模拟面板缺陷标注图")
  863. st.markdown("选择批次和面板,查看缺陷在面板上的实际分布标注(按缺陷类型用不同颜色/形状区分)")
  864. ann_col1, ann_col2, ann_col3 = st.columns(3)
  865. with ann_col1:
  866. ann_batch = st.selectbox("选择批次", options=sorted(filtered_df["batch_id"].unique()), key="ann_batch")
  867. with ann_col2:
  868. panels_in_batch = sorted(filtered_df[filtered_df["batch_id"] == ann_batch]["panel_id"].unique())
  869. ann_panel = st.selectbox("选择面板", options=panels_in_batch, key="ann_panel")
  870. with ann_col3:
  871. ann_show_label = st.checkbox("显示缺陷标签", value=True)
  872. panel_defects = filtered_df[(filtered_df["batch_id"] == ann_batch) & (filtered_df["panel_id"] == ann_panel)]
  873. if len(panel_defects) == 0:
  874. st.warning(f"当前面板 **{ann_panel}** (批次 {ann_batch}) 在筛选条件下无缺陷记录,请调整筛选条件或选择其他面板")
  875. else:
  876. pw = df["panel_width_mm"].iloc[0]
  877. ph = df["panel_height_mm"].iloc[0]
  878. # 缺陷类型 → 颜色/形状映射
  879. type_style = {
  880. "划痕": {"color": "red", "marker": "x", "size": 80},
  881. "亮点": {"color": "yellow", "marker": "o", "size": 60},
  882. "暗点": {"color": "black", "marker": "x", "size": 60},
  883. "气泡": {"color": "cyan", "marker": "o", "size": 100},
  884. "色差": {"color": "magenta", "marker": "s", "size": 70},
  885. "漏光": {"color": "orange", "marker": "D", "size": 80},
  886. "裂纹": {"color": "darkred", "marker": "v", "size": 90},
  887. "异物": {"color": "green", "marker": "P", "size": 80},
  888. }
  889. fig_ann, ax_ann = plt.subplots(figsize=(3.5, 5))
  890. # 面板背景(模拟屏幕灰色渐变)
  891. ax_ann.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  892. # 内框(模拟屏幕可视区域)
  893. margin = 8
  894. ax_ann.add_patch(plt.Rectangle((margin, margin), pw - 2*margin, ph - 2*margin,
  895. facecolor="#16213e", edgecolor="#0f3460", linewidth=1.5))
  896. # FPC绑定区域标注
  897. fpc_y = ph * 0.7
  898. ax_ann.axhline(y=fpc_y, color="#555", linestyle="--", alpha=0.4, linewidth=0.5)
  899. ax_ann.text(pw/2, fpc_y + 2, "FPC区", color="#666", fontsize=7, ha="center", alpha=0.5)
  900. # 绘制缺陷标注
  901. for _, row in panel_defects.iterrows():
  902. style = type_style.get(row["defect_type"], {"color": "white", "marker": "o", "size": 50})
  903. severity_size = {"轻微": 0.7, "中等": 1.0, "严重": 1.4}.get(row["severity"], 1.0)
  904. ax_ann.scatter(row["x_mm"], row["y_mm"],
  905. c=style["color"], marker=style["marker"],
  906. s=style["size"] * severity_size,
  907. edgecolors="white", linewidth=0.3, alpha=0.85, zorder=3)
  908. if ann_show_label:
  909. ax_ann.annotate(row["defect_type"][:2],
  910. (row["x_mm"], row["y_mm"]),
  911. fontsize=5, color="white",
  912. ha="center", va="bottom", alpha=0.7, zorder=4)
  913. # 图例
  914. legend_elements = [plt.Line2D([0], [0], marker=type_style[t]["marker"], color="w",
  915. markerfacecolor=type_style[t]["color"], markersize=8,
  916. label=t, markeredgewidth=0.5, markeredgecolor="white")
  917. for t in type_style]
  918. ax_ann.legend(handles=legend_elements, loc="upper right", fontsize=7,
  919. framealpha=0.7, facecolor="#222", edgecolor="#555")
  920. ax_ann.set_xlim(-5, pw + 5)
  921. ax_ann.set_ylim(-5, ph + 5)
  922. ax_ann.set_title(f"面板 {ann_panel} | 批次 {ann_batch} | {len(panel_defects)} 个缺陷",
  923. fontsize=11, pad=10)
  924. ax_ann.set_xlabel("X (mm)")
  925. ax_ann.set_ylabel("Y (mm)")
  926. ax_ann.set_aspect("equal")
  927. ax_ann.grid(True, alpha=0.1, color="gray")
  928. st.pyplot(fig_ann)
  929. plt.close()
  930. # ========== Tab 2: 帕累托分析 ==========
  931. _t = get_tab("📊 类型集中性 (帕累托)")
  932. if _t:
  933. with _t:
  934. st.header("缺陷类型帕累托分析")
  935. type_counts = filtered_df["defect_type"].value_counts().reset_index()
  936. type_counts.columns = ["缺陷类型", "数量"]
  937. type_counts = type_counts.sort_values("数量", ascending=False).reset_index(drop=True)
  938. type_counts["累计占比"] = type_counts["数量"].cumsum() / type_counts["数量"].sum() * 100
  939. type_counts["占比"] = type_counts["数量"] / type_counts["数量"].sum() * 100
  940. fig, ax1 = plt.subplots(figsize=(10, 5))
  941. # 柱状图
  942. bars = ax1.bar(type_counts["缺陷类型"], type_counts["数量"], color="steelblue", alpha=0.8)
  943. ax1.set_xlabel("缺陷类型")
  944. ax1.set_ylabel("数量", color="steelblue")
  945. ax1.set_title("帕累托图 - 缺陷类型分布")
  946. # 累计占比折线
  947. ax2 = ax1.twinx()
  948. ax2.plot(type_counts["缺陷类型"], type_counts["累计占比"], color="red", marker="o", linewidth=2)
  949. ax2.axhline(y=80, color="green", linestyle="--", alpha=0.5, label="80%线")
  950. ax2.set_ylabel("累计占比 (%)", color="red")
  951. ax2.set_ylim(0, 110)
  952. # 标注数值
  953. for bar, count in zip(bars, type_counts["数量"]):
  954. ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2,
  955. str(count), ha="center", va="bottom", fontsize=9)
  956. st.pyplot(fig)
  957. plt.close()
  958. # 数据表格
  959. st.subheader("详细数据")
  960. st.dataframe(type_counts, use_container_width=True)
  961. # 严重程度分布
  962. st.subheader("按严重程度分布")
  963. sev_counts = filtered_df["severity"].value_counts()
  964. fig2, ax = plt.subplots(figsize=(6, 4))
  965. colors = {"轻微": "#4CAF50", "中等": "#FF9800", "严重": "#F44336"}
  966. sev_counts.plot(kind="bar", ax=ax, color=[colors.get(s, "gray") for s in sev_counts.index])
  967. ax.set_title("缺陷严重程度分布")
  968. ax.set_ylabel("数量")
  969. st.pyplot(fig2)
  970. plt.close()
  971. # ========== Tab 3: 时间集中性 ==========
  972. _t = get_tab("📈 时间集中性")
  973. if _t:
  974. with _t:
  975. st.header("缺陷时间分布趋势")
  976. col1, col2 = st.columns(2)
  977. with col1:
  978. # 按天趋势
  979. daily = filtered_df.groupby("day").size().reset_index(name="缺陷数")
  980. daily["day"] = pd.to_datetime(daily["day"])
  981. fig1, ax1 = plt.subplots(figsize=(10, 4))
  982. ax1.plot(daily["day"], daily["缺陷数"], marker="o", markersize=3, linewidth=1.5, color="steelblue")
  983. ax1.fill_between(daily["day"], daily["缺陷数"], alpha=0.2, color="steelblue")
  984. ax1.set_title("每日缺陷数量趋势")
  985. ax1.set_ylabel("缺陷数量")
  986. ax1.tick_params(axis="x", rotation=45)
  987. # 移动平均
  988. if len(daily) > 3:
  989. daily["移动平均(3天)"] = daily["缺陷数"].rolling(window=3, min_periods=1).mean()
  990. ax1.plot(daily["day"], daily["移动平均(3天)"], color="red", linestyle="--",
  991. linewidth=2, alpha=0.7, label="3日移动平均")
  992. ax1.legend()
  993. st.pyplot(fig1)
  994. plt.close()
  995. with col2:
  996. # 按小时分布
  997. hourly = filtered_df.groupby("hour").size().reindex(range(24), fill_value=0)
  998. fig2, ax2 = plt.subplots(figsize=(10, 4))
  999. colors = ["#FF6B6B" if (h >= 17 or h < 8) else "#4ECDC4" for h in hourly.index]
  1000. ax2.bar(hourly.index, hourly.values, color=colors, alpha=0.8)
  1001. ax2.set_title("每小时缺陷分布 (红色=夜班)")
  1002. ax2.set_xlabel("小时")
  1003. ax2.set_ylabel("缺陷数量")
  1004. st.pyplot(fig2)
  1005. plt.close()
  1006. # 班次对比
  1007. st.subheader("班次对比")
  1008. shift_stats = filtered_df.groupby("shift").agg({
  1009. "defect_id": "count",
  1010. "panel_id": "nunique"
  1011. }).rename(columns={"defect_id": "缺陷数", "panel_id": "涉及面板数"})
  1012. st.dataframe(shift_stats, use_container_width=True)
  1013. # 每周分布
  1014. st.subheader("按星期分布")
  1015. filtered_df_copy = filtered_df.copy()
  1016. filtered_df_copy["weekday"] = filtered_df_copy["timestamp"].dt.day_name()
  1017. weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
  1018. weekday_cn = {"Monday": "周一", "Tuesday": "周二", "Wednesday": "周三",
  1019. "Thursday": "周四", "Friday": "周五", "Saturday": "周六", "Sunday": "周日"}
  1020. filtered_df_copy["星期"] = filtered_df_copy["weekday"].map(weekday_cn)
  1021. weekday_counts = filtered_df_copy.groupby("星期").size().reindex(
  1022. [weekday_cn[d] for d in weekday_order], fill_value=0
  1023. )
  1024. fig3, ax3 = plt.subplots(figsize=(8, 4))
  1025. ax3.bar(range(7), weekday_counts.values, color="steelblue", alpha=0.8)
  1026. ax3.set_xticks(range(7))
  1027. ax3.set_xticklabels(weekday_counts.index)
  1028. ax3.set_title("按星期分布")
  1029. ax3.set_ylabel("缺陷数量")
  1030. st.pyplot(fig3)
  1031. plt.close()
  1032. # ========== Tab 4: 批次集中性 ==========
  1033. _t = get_tab("🏭 批次集中性")
  1034. if _t:
  1035. with _t:
  1036. st.header("批次缺陷集中性分析")
  1037. batch_stats = filtered_df.groupby("batch_id").agg({
  1038. "defect_id": "count",
  1039. "panel_id": "nunique",
  1040. "severity": lambda x: (x == "严重").sum()
  1041. }).rename(columns={"defect_id": "缺陷数", "panel_id": "面板数", "severity": "严重缺陷数"})
  1042. batch_stats["缺陷率"] = batch_stats["缺陷数"] / batch_stats["面板数"]
  1043. batch_stats = batch_stats.sort_index()
  1044. col1, col2 = st.columns(2)
  1045. with col1:
  1046. fig1, ax1 = plt.subplots(figsize=(10, 4))
  1047. ax1.bar(range(len(batch_stats)), batch_stats["缺陷数"], color="steelblue", alpha=0.8)
  1048. ax1.set_title("各批次缺陷数量")
  1049. ax1.set_xlabel("批次")
  1050. ax1.set_ylabel("缺陷数")
  1051. ax1.set_xticks(range(len(batch_stats)))
  1052. ax1.set_xticklabels(batch_stats.index, rotation=90, fontsize=7)
  1053. st.pyplot(fig1)
  1054. plt.close()
  1055. with col2:
  1056. fig2, ax2 = plt.subplots(figsize=(10, 4))
  1057. ax2.plot(range(len(batch_stats)), batch_stats["缺陷率"], marker="o", markersize=3,
  1058. color="red", linewidth=1.5)
  1059. ax2.axhline(y=batch_stats["缺陷率"].mean(), color="green", linestyle="--",
  1060. label=f"平均缺陷率: {batch_stats['缺陷率'].mean():.2%}")
  1061. ax2.set_title("各批次缺陷率趋势")
  1062. ax2.set_xlabel("批次")
  1063. ax2.set_ylabel("缺陷率")
  1064. ax2.set_xticks(range(len(batch_stats)))
  1065. ax2.set_xticklabels(batch_stats.index, rotation=90, fontsize=7)
  1066. ax2.legend()
  1067. st.pyplot(fig2)
  1068. plt.close()
  1069. # 异常批次
  1070. st.subheader("异常批次 (缺陷率 > 平均值 + 1倍标准差)")
  1071. threshold = batch_stats["缺陷率"].mean() + batch_stats["缺陷率"].std()
  1072. abnormal = batch_stats[batch_stats["缺陷率"] > threshold].sort_values("缺陷率", ascending=False)
  1073. if len(abnormal) > 0:
  1074. st.dataframe(abnormal, use_container_width=True)
  1075. else:
  1076. st.success("未发现异常批次")
  1077. # ========== Tab 5: 设备座号集中性 ==========
  1078. _t = get_tab("🏗️ 设备座号集中性")
  1079. if _t:
  1080. with _t:
  1081. st.header("🏗️ 前贴附制程设备座号集中性分析")
  1082. st.markdown(
  1083. "分析缺陷是否集中在特定设备的特定座号(工位)。"
  1084. "如果某个座号缺陷明显多于其他座号,说明该座号对应的设备局部存在问题(如吸嘴老化、加热不均、压力异常等)。"
  1085. )
  1086. # --- 设备对比 ---
  1087. st.subheader("设备级别对比")
  1088. eq_stats = filtered_df.groupby("equipment_id").agg({
  1089. "defect_id": "count",
  1090. "panel_id": "nunique",
  1091. "severity": lambda x: (x == "严重").sum()
  1092. }).rename(columns={"defect_id": "缺陷数", "panel_id": "面板数", "severity": "严重缺陷"})
  1093. eq_stats["缺陷率"] = eq_stats["缺陷数"] / eq_stats["面板数"]
  1094. eq_stats = eq_stats.sort_values("缺陷数", ascending=False)
  1095. col_eq1, col_eq2 = st.columns(2)
  1096. with col_eq1:
  1097. fig_eq1, ax_eq1 = plt.subplots(figsize=(8, 4))
  1098. bars1 = ax_eq1.bar(range(len(eq_stats)), eq_stats["缺陷数"], color=["#FF6B6B", "#4ECDC4", "#45B7D1"][:len(eq_stats)], alpha=0.8)
  1099. ax_eq1.set_xticks(range(len(eq_stats)))
  1100. ax_eq1.set_xticklabels(eq_stats.index, fontsize=10)
  1101. ax_eq1.set_ylabel("缺陷数量")
  1102. ax_eq1.set_title("各设备缺陷总数")
  1103. for bar, count in zip(bars1, eq_stats["缺陷数"]):
  1104. ax_eq1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 3,
  1105. str(count), ha="center", va="bottom", fontsize=10, fontweight="bold")
  1106. st.pyplot(fig_eq1)
  1107. plt.close()
  1108. with col_eq2:
  1109. fig_eq2, ax_eq2 = plt.subplots(figsize=(8, 4))
  1110. bars2 = ax_eq2.bar(range(len(eq_stats)), eq_stats["缺陷率"] * 100,
  1111. color=["#FF6B6B", "#4ECDC4", "#45B7D1"][:len(eq_stats)], alpha=0.8)
  1112. ax_eq2.set_xticks(range(len(eq_stats)))
  1113. ax_eq2.set_xticklabels(eq_stats.index, fontsize=10)
  1114. ax_eq2.set_ylabel("缺陷率 (%)")
  1115. ax_eq2.set_title("各设备缺陷率")
  1116. for bar, rate in zip(bars2, eq_stats["缺陷率"] * 100):
  1117. ax_eq2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.3,
  1118. f"{rate:.1f}%", ha="center", va="bottom", fontsize=10, fontweight="bold")
  1119. st.pyplot(fig_eq2)
  1120. plt.close()
  1121. st.dataframe(eq_stats, use_container_width=True)
  1122. # --- 座号级别分析 ---
  1123. st.divider()
  1124. st.subheader("座号级别缺陷分布")
  1125. # 选择设备查看座号
  1126. eq_for_seat = st.selectbox("选择设备查看座号分布", options=sorted(filtered_df["equipment_id"].unique()), key="eq_seat")
  1127. eq_data = filtered_df[filtered_df["equipment_id"] == eq_for_seat]
  1128. eq_info = None
  1129. for eq_name, info in [("LAM-A01", {"rows": 4, "cols": 5}), ("LAM-A02", {"rows": 4, "cols": 5}), ("LAM-B01", {"rows": 5, "cols": 4})]:
  1130. if eq_name == eq_for_seat:
  1131. eq_info = info
  1132. break
  1133. seat_counts = eq_data.groupby("seat_id").size().reset_index(name="缺陷数")
  1134. seat_counts = seat_counts.sort_values("缺陷数", ascending=False)
  1135. if eq_info:
  1136. # 网格热力图
  1137. grid = np.zeros((eq_info["rows"], eq_info["cols"]))
  1138. seat_to_defects = eq_data.groupby("seat_id").size().to_dict()
  1139. for r in range(1, eq_info["rows"] + 1):
  1140. for c in range(1, eq_info["cols"] + 1):
  1141. seat_name = f"R{r}C{c}"
  1142. grid[r - 1, c - 1] = seat_to_defects.get(seat_name, 0)
  1143. fig_grid, ax_grid = plt.subplots(figsize=(8, 6))
  1144. im = ax_grid.imshow(grid, cmap="YlOrRd", aspect="equal")
  1145. ax_grid.set_title(f"{eq_for_seat} 座号缺陷热力图")
  1146. ax_grid.set_xlabel("列号")
  1147. ax_grid.set_ylabel("行号")
  1148. ax_grid.set_xticks(range(eq_info["cols"]))
  1149. ax_grid.set_xticklabels([f"C{i+1}" for i in range(eq_info["cols"])])
  1150. ax_grid.set_yticks(range(eq_info["rows"]))
  1151. ax_grid.set_yticklabels([f"R{i+1}" for i in range(eq_info["rows"])])
  1152. # 标注数值
  1153. for r in range(eq_info["rows"]):
  1154. for c in range(eq_info["cols"]):
  1155. val = int(grid[r, c])
  1156. color = "white" if val > grid.max() * 0.7 else "black"
  1157. ax_grid.text(c, r, str(val), ha="center", va="center", fontsize=10,
  1158. color=color, fontweight="bold")
  1159. plt.colorbar(im, ax=ax_grid, label="缺陷数量")
  1160. st.pyplot(fig_grid)
  1161. plt.close()
  1162. else:
  1163. fig_bar, ax_bar = plt.subplots(figsize=(10, 4))
  1164. ax_bar.bar(range(len(seat_counts)), seat_counts["缺陷数"], color="steelblue", alpha=0.8)
  1165. ax_bar.set_xticks(range(len(seat_counts)))
  1166. ax_bar.set_xticklabels(seat_counts["seat_id"], rotation=45, fontsize=8)
  1167. ax_bar.set_ylabel("缺陷数量")
  1168. ax_bar.set_title("座号缺陷分布")
  1169. st.pyplot(fig_bar)
  1170. plt.close()
  1171. # 座号数据表格
  1172. st.dataframe(seat_counts, use_container_width=True)
  1173. # --- 异常座号检测 ---
  1174. st.divider()
  1175. st.subheader("异常座号检测")
  1176. all_seat_stats = filtered_df.groupby(["equipment_id", "seat_id"]).size().reset_index(name="缺陷数")
  1177. overall_mean = all_seat_stats["缺陷数"].mean()
  1178. overall_std = all_seat_stats["缺陷数"].std()
  1179. threshold_1x = overall_mean + overall_std
  1180. threshold_2x = overall_mean + 2 * overall_std
  1181. st.info(f"📊 全局统计: 平均每个座号 **{overall_mean:.1f}** 个缺陷 | 标准差 **{overall_std:.1f}**")
  1182. col_anom1, col_anom2 = st.columns(2)
  1183. with col_anom1:
  1184. st.markdown(f"**⚠️ 1σ 预警座号** (缺陷数 > {threshold_1x:.0f})")
  1185. warning_seats = all_seat_stats[all_seat_stats["缺陷数"] > threshold_1x].sort_values("缺陷数", ascending=False)
  1186. if len(warning_seats) > 0:
  1187. st.dataframe(warning_seats.reset_index(drop=True), use_container_width=True)
  1188. else:
  1189. st.success("无预警座号")
  1190. with col_anom2:
  1191. st.markdown(f"**🔴 2σ 异常座号** (缺陷数 > {threshold_2x:.0f})")
  1192. critical_seats = all_seat_stats[all_seat_stats["缺陷数"] > threshold_2x].sort_values("缺陷数", ascending=False)
  1193. if len(critical_seats) > 0:
  1194. st.dataframe(critical_seats.reset_index(drop=True), use_container_width=True)
  1195. else:
  1196. st.success("无异常座号")
  1197. # --- 座号 × 缺陷类型 交叉分析 ---
  1198. st.divider()
  1199. st.subheader("座号 × 缺陷类型 交叉分析")
  1200. st.markdown("识别哪些座号偏向产生特定类型的缺陷(如 R2C3 座号主要产生气泡 → 吸嘴问题)")
  1201. if eq_info:
  1202. eq_seat_type = eq_data.groupby(["seat_id", "defect_type"]).size().unstack(fill_value=0)
  1203. fig_ct, ax_ct = plt.subplots(figsize=(10, 6))
  1204. sns.heatmap(eq_seat_type, annot=True, fmt="d", cmap="YlOrRd", ax=ax_ct,
  1205. linewidths=0.5, linecolor="white")
  1206. ax_ct.set_title(f"{eq_for_seat} 座号 × 缺陷类型 热力图")
  1207. st.pyplot(fig_ct)
  1208. plt.close()
  1209. # ========== Tab 6: 关联分析 ==========
  1210. _t = get_tab("🔗 关联分析")
  1211. if _t:
  1212. with _t:
  1213. st.header("缺陷关联分析")
  1214. col1, col2 = st.columns(2)
  1215. with col1:
  1216. # 缺陷类型 x 严重程度 交叉表
  1217. ct = pd.crosstab(filtered_df["defect_type"], filtered_df["severity"])
  1218. fig1, ax1 = plt.subplots(figsize=(8, 5))
  1219. sns.heatmap(ct, annot=True, fmt="d", cmap="YlOrRd", ax=ax1,
  1220. linewidths=0.5, linecolor="white")
  1221. ax1.set_title("缺陷类型 × 严重程度 热力图")
  1222. st.pyplot(fig1)
  1223. plt.close()
  1224. with col2:
  1225. # 缺陷类型 x 班次 交叉表
  1226. ct2 = pd.crosstab(filtered_df["defect_type"], filtered_df["shift"])
  1227. fig2, ax2 = plt.subplots(figsize=(8, 5))
  1228. sns.heatmap(ct2, annot=True, fmt="d", cmap="Blues", ax=ax2,
  1229. linewidths=0.5, linecolor="white")
  1230. ax2.set_title("缺陷类型 × 班次 热力图")
  1231. st.pyplot(fig2)
  1232. plt.close()
  1233. # 面板缺陷 TOP10
  1234. st.subheader("缺陷最多的面板 TOP10")
  1235. panel_defects = filtered_df.groupby("panel_id").agg({
  1236. "defect_id": "count",
  1237. "defect_type": lambda x: x.mode().iloc[0] if len(x) > 0 else "N/A"
  1238. }).rename(columns={"defect_id": "缺陷数", "defect_type": "主要缺陷类型"})
  1239. panel_defects = panel_defects.sort_values("缺陷数", ascending=False).head(10)
  1240. st.dataframe(panel_defects, use_container_width=True)
  1241. # 面板缺陷分布
  1242. fig3, ax3 = plt.subplots(figsize=(8, 4))
  1243. panel_counts = filtered_df.groupby("panel_id").size()
  1244. ax3.hist(panel_counts, bins=20, color="steelblue", alpha=0.8, edgecolor="white")
  1245. ax3.set_title("单面板缺陷数量分布")
  1246. ax3.set_xlabel("缺陷数/面板")
  1247. ax3.set_ylabel("面板数量")
  1248. ax3.axvline(x=panel_counts.mean(), color="red", linestyle="--", label=f"平均: {panel_counts.mean():.1f}")
  1249. ax3.legend()
  1250. st.pyplot(fig3)
  1251. plt.close()
  1252. # --- 智能缺陷聚类 (DBSCAN + PCA) ---
  1253. _t = get_tab("🧠 智能缺陷聚类 (DBSCAN)")
  1254. if _t:
  1255. with _t:
  1256. st.header("🧠 DBSCAN 智能缺陷空间聚类")
  1257. st.markdown(
  1258. "**原理**: DBSCAN 是基于密度的空间聚类算法,能自动识别任意形状的缺陷聚集区域,"
  1259. "无需预设聚类数量,自动过滤随机散落的噪声缺陷。"
  1260. "行业标准:半导体晶圆/面板缺陷模式识别首选算法。"
  1261. )
  1262. col1, col2 = st.columns([2, 1])
  1263. with col1:
  1264. # --- 参数控制 ---
  1265. st.subheader("参数设置")
  1266. p_col1, p_col2 = st.columns(2)
  1267. with p_col1:
  1268. eps = st.slider(
  1269. "eps (邻域半径 mm)",
  1270. min_value=5.0, max_value=100.0, value=25.0, step=5.0,
  1271. help="两个点被视为'邻居'的最大距离。值越大,簇越大。"
  1272. )
  1273. with p_col2:
  1274. min_samples = st.slider(
  1275. "min_samples (最小簇点数)",
  1276. min_value=3, max_value=50, value=10,
  1277. help="形成一个簇所需的最小点数。值越大,越严格的聚集才算簇。"
  1278. )
  1279. # --- 执行聚类 ---
  1280. coords = filtered_df[["x_mm", "y_mm"]].values
  1281. scaler = StandardScaler()
  1282. coords_scaled = scaler.fit_transform(coords)
  1283. dbscan = DBSCAN(eps=eps / scaler.scale_[0], min_samples=min_samples)
  1284. filtered_df["cluster"] = dbscan.fit_predict(coords_scaled)
  1285. # 统计聚类结果
  1286. n_clusters = len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0)
  1287. n_noise = list(dbscan.labels_).count(-1)
  1288. st.info(f"📊 **聚类结果**: 发现 **{n_clusters}** 个缺陷聚集区域,**{n_noise}** 个噪声点(随机散落缺陷)")
  1289. # --- 可视化 ---
  1290. fig, axes = plt.subplots(1, 2, figsize=(14, 6))
  1291. # 左图:聚类结果(空间位置)
  1292. labels = filtered_df["cluster"].values
  1293. unique_labels = set(labels)
  1294. colors = plt.cm.get_cmap("tab20", len(unique_labels) if len(unique_labels) > 0 else 1)
  1295. for k in unique_labels:
  1296. if k == -1:
  1297. # 噪声点
  1298. xy = filtered_df[labels == k][["x_mm", "y_mm"]].values
  1299. axes[0].scatter(xy[:, 0], xy[:, 1], c="lightgray", s=3, alpha=0.3, label="噪声")
  1300. else:
  1301. xy = filtered_df[labels == k][["x_mm", "y_mm"]].values
  1302. axes[0].scatter(xy[:, 0], xy[:, 1], c=[colors(k)], s=15, alpha=0.7,
  1303. label=f"簇 {k+1} ({len(xy)} 点)")
  1304. axes[0].set_title(f"DBSCAN 空间聚类结果 (eps={eps}, min_samples={min_samples})")
  1305. axes[0].set_xlabel("X (mm)")
  1306. axes[0].set_ylabel("Y (mm)")
  1307. axes[0].set_aspect("equal")
  1308. axes[0].legend(fontsize=7, loc="upper right", ncol=2)
  1309. # 右图:PCA 降维可视化(加入更多特征维度)
  1310. if len(filtered_df) > 2:
  1311. # 构建多维特征:x, y, hour, defect_type编码, severity编码
  1312. feature_df = filtered_df[["x_mm", "y_mm", "hour"]].copy()
  1313. # 缺陷类型编码
  1314. type_map = {t: i for i, t in enumerate(filtered_df["defect_type"].unique())}
  1315. feature_df["type_code"] = filtered_df["defect_type"].map(type_map).astype(float)
  1316. # 严重程度编码
  1317. sev_map = {"轻微": 0, "中等": 1, "严重": 2}
  1318. feature_df["sev_code"] = filtered_df["severity"].map(sev_map).astype(float)
  1319. features = feature_df.values
  1320. features_scaled = StandardScaler().fit_transform(features)
  1321. # PCA 降维到 2D
  1322. n_components = min(2, features_scaled.shape[1])
  1323. pca = PCA(n_components=n_components)
  1324. pca_result = pca.fit_transform(features_scaled)
  1325. explained_var = pca.explained_variance_ratio_
  1326. for k in unique_labels:
  1327. mask_k = labels == k
  1328. if k == -1:
  1329. axes[1].scatter(pca_result[mask_k, 0], pca_result[mask_k, 1],
  1330. c="lightgray", s=3, alpha=0.3, label="噪声")
  1331. else:
  1332. axes[1].scatter(pca_result[mask_k, 0], pca_result[mask_k, 1],
  1333. c=[colors(k)], s=15, alpha=0.7, label=f"簇 {k+1}")
  1334. axes[1].set_title(
  1335. f"PCA 多维特征降维\n"
  1336. f"PC1: {explained_var[0]*100:.1f}% | PC2: {explained_var[1]*100:.1f}%"
  1337. )
  1338. axes[1].set_xlabel("主成分 1")
  1339. axes[1].set_ylabel("主成分 2")
  1340. axes[1].legend(fontsize=7, loc="upper right")
  1341. st.pyplot(fig)
  1342. plt.close()
  1343. # --- 簇特征统计 ---
  1344. if n_clusters > 0:
  1345. st.divider()
  1346. st.subheader("各簇特征分析")
  1347. cluster_data = []
  1348. for k in sorted([c for c in unique_labels if c != -1]):
  1349. cluster_df = filtered_df[labels == k]
  1350. cluster_data.append({
  1351. "簇编号": k + 1,
  1352. "缺陷数量": len(cluster_df),
  1353. "占比": f"{len(cluster_df)/len(filtered_df)*100:.1f}%",
  1354. "中心X(mm)": round(cluster_df["x_mm"].mean(), 1),
  1355. "中心Y(mm)": round(cluster_df["y_mm"].mean(), 1),
  1356. "X范围": f"{cluster_df['x_mm'].min():.0f}~{cluster_df['x_mm'].max():.0f}",
  1357. "Y范围": f"{cluster_df['y_mm'].min():.0f}~{cluster_df['y_mm'].max():.0f}",
  1358. "主要缺陷": cluster_df["defect_type"].mode().iloc[0] if len(cluster_df) > 0 else "-",
  1359. "主要严重度": cluster_df["severity"].mode().iloc[0] if len(cluster_df) > 0 else "-",
  1360. "涉及批次": cluster_df["batch_id"].nunique(),
  1361. "涉及面板": cluster_df["panel_id"].nunique(),
  1362. })
  1363. st.dataframe(pd.DataFrame(cluster_data), use_container_width=True)
  1364. with col2:
  1365. # --- 聚类结果说明 ---
  1366. st.subheader("📖 结果解读")
  1367. st.markdown(
  1368. f"""
  1369. **当前参数**: eps={eps}mm, min_samples={min_samples}
  1370. **聚类统计**:
  1371. - 缺陷聚集区域: {n_clusters} 个
  1372. - 随机散落噪声: {n_noise} 个
  1373. - 噪声占比: {n_noise/len(filtered_df)*100:.1f}%
  1374. **参数调优建议**:
  1375. - **eps 调大** → 簇数量减少,簇变大
  1376. - **eps 调小** → 簇数量增加,更精细
  1377. - **min_samples 调大** → 只有高度密集区域才算簇
  1378. - **min_samples 调小** → 更多区域被识别为簇
  1379. **工业应用**:
  1380. - 每个"簇"代表一个**系统性缺陷源**
  1381. (如某台设备、某道工序、某个物料批次)
  1382. - "噪声"点是随机缺陷,通常无需特别关注
  1383. - 重点关注**缺陷数量多、涉及批次集中**的簇
  1384. """
  1385. )
  1386. # --- 簇分布饼图 ---
  1387. if n_clusters > 0:
  1388. st.subheader("簇规模分布")
  1389. cluster_counts = filtered_df[labels >= 0]["cluster"].value_counts().sort_index()
  1390. fig_pie, ax_pie = plt.subplots(figsize=(5, 5))
  1391. pie_labels = [f"簇{i+1}" for i in cluster_counts.index]
  1392. ax_pie.pie(cluster_counts.values, labels=pie_labels, autopct="%1.1f%%",
  1393. colors=plt.cm.tab20.colors[:len(cluster_counts)], startangle=90)
  1394. ax_pie.set_title("各簇缺陷占比")
  1395. st.pyplot(fig_pie)
  1396. plt.close()
  1397. # --- DBSCAN vs K-Means 对比 ---
  1398. st.subheader("为什么选 DBSCAN?")
  1399. st.markdown(
  1400. """
  1401. | 维度 | DBSCAN | K-Means |
  1402. |------|--------|---------|
  1403. | 形状适应 | ✅ 任意形状 | ❌ 仅球形 |
  1404. | 预设K值 | ❌ 不需要 | ✅ 必须 |
  1405. | 噪声处理 | ✅ 自动过滤 | ❌ 干扰聚类 |
  1406. | 环形/线形缺陷 | ✅ 能识别 | ❌ 识别不了 |
  1407. """
  1408. )
  1409. # ========== Tab 8: SPC 控制图与预警 ==========
  1410. _t = get_tab("🚨 SPC 控制图与预警")
  1411. if _t:
  1412. with _t:
  1413. st.header("🚨 SPC 统计过程控制")
  1414. st.markdown(
  1415. "基于统计过程控制(SPC)方法,监控每日缺陷率是否在控制限内,"
  1416. "自动检测异常趋势并给出改善/恶化结论。"
  1417. )
  1418. # --- 数据准备:按天计算缺陷率 ---
  1419. # 需要知道每天检测了多少面板才能算缺陷率
  1420. # 用 batch_id 近似日期
  1421. spc_metrics = calculate_spc_metrics(df)
  1422. daily_all = spc_metrics["daily"]
  1423. if len(daily_all) < 2:
  1424. st.warning("数据天数不足,无法生成控制图")
  1425. else:
  1426. # 控制限计算
  1427. p_bar = spc_metrics["p_bar"]
  1428. sigma_p = spc_metrics["sigma_p"]
  1429. UCL = spc_metrics["ucl"]
  1430. LCL = spc_metrics["lcl"]
  1431. UWL = spc_metrics["uwl"]
  1432. LWL = spc_metrics["lwl"]
  1433. # --- Western Electric 规则检测 ---
  1434. we_violations = []
  1435. # 规则1: 单点超出 3σ 控制限
  1436. for i, row in daily_all.iterrows():
  1437. if row["defect_rate"] > UCL or row["defect_rate"] < LCL:
  1438. we_violations.append({
  1439. "日期": row["day"].strftime("%Y-%m-%d"),
  1440. "规则": "Rule 1: 超出3σ控制限",
  1441. "值": f"{row['defect_rate']:.2%}"
  1442. })
  1443. # 规则2: 连续7点上升或下降
  1444. rates = daily_all["defect_rate"].values
  1445. if len(rates) >= 7:
  1446. for i in range(len(rates) - 6):
  1447. window = rates[i:i+7]
  1448. if all(window[j] < window[j+1] for j in range(6)):
  1449. we_violations.append({
  1450. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  1451. "规则": "Rule 2: 连续7点上升",
  1452. "值": f"{rates[i]:.2%} → {rates[i+6]:.2%}"
  1453. })
  1454. elif all(window[j] > window[j+1] for j in range(6)):
  1455. we_violations.append({
  1456. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  1457. "规则": "Rule 2: 连续7点下降",
  1458. "值": f"{rates[i]:.2%} → {rates[i+6]:.2%}"
  1459. })
  1460. # 规则3: 连续7点在中心线同一侧
  1461. for i in range(len(rates) - 6):
  1462. window = rates[i:i+7]
  1463. if all(v > p_bar for v in window):
  1464. we_violations.append({
  1465. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  1466. "规则": "Rule 3: 连续7点在CL上方",
  1467. "值": f"持续偏高"
  1468. })
  1469. elif all(v < p_bar for v in window):
  1470. we_violations.append({
  1471. "日期": daily_all.loc[i+6, "day"].strftime("%Y-%m-%d"),
  1472. "规则": "Rule 3: 连续7点在CL下方",
  1473. "值": f"持续偏低"
  1474. })
  1475. # --- 趋势分析 ---
  1476. from numpy.polynomial import polynomial as P
  1477. x = np.arange(len(daily_all))
  1478. coeffs = np.polyfit(x, rates, 1)
  1479. slope = coeffs[0]
  1480. daily_all["trend"] = np.polyval(coeffs, x)
  1481. if abs(slope) < sigma_p * 0.1:
  1482. trend_status = "稳定"
  1483. trend_icon = "➡️"
  1484. trend_color = "normal"
  1485. elif slope > 0:
  1486. trend_status = "恶化中"
  1487. trend_icon = "📈"
  1488. trend_color = "inverse"
  1489. else:
  1490. trend_status = "改善中"
  1491. trend_icon = "📉"
  1492. trend_color = "normal"
  1493. # --- KPI 行 ---
  1494. kpi_spc1, kpi_spc2, kpi_spc3, kpi_spc4 = st.columns(4)
  1495. kpi_spc1.metric("平均缺陷率", f"{p_bar:.2%}")
  1496. kpi_spc2.metric("控制限 (UCL/LCL)", f"{UCL:.2%} / {LCL:.2%}")
  1497. kpi_spc3.metric("趋势判断", f"{trend_icon} {trend_status}", delta=f"斜率: {slope*100:.3f}%/天", delta_color=trend_color)
  1498. kpi_spc4.metric("Western Electric 告警", f"{len(we_violations)} 次", delta="需关注" if len(we_violations) > 0 else "正常")
  1499. # --- 控制图 ---
  1500. st.divider()
  1501. st.subheader("X-bar 控制图 (每日缺陷率)")
  1502. fig_spc, ax_spc = plt.subplots(figsize=(14, 5))
  1503. # 数据点
  1504. ax_spc.plot(daily_all["day"], daily_all["defect_rate"],
  1505. marker="o", markersize=4, linewidth=1.5, color="steelblue", label="日缺陷率")
  1506. ax_spc.fill_between(daily_all["day"], daily_all["defect_rate"], alpha=0.15, color="steelblue")
  1507. # 控制限线
  1508. ax_spc.axhline(y=p_bar, color="green", linestyle="-", linewidth=1.5, label=f"CL (中心线): {p_bar:.2%}")
  1509. ax_spc.axhline(y=UCL, color="red", linestyle="--", linewidth=1, label=f"UCL: {UCL:.2%}")
  1510. ax_spc.axhline(y=LCL, color="red", linestyle="--", linewidth=1, label=f"LCL: {LCL:.2%}")
  1511. ax_spc.axhline(y=UWL, color="orange", linestyle=":", linewidth=1, alpha=0.6, label=f"UWL (2σ): {UWL:.2%}")
  1512. ax_spc.axhline(y=LWL, color="orange", linestyle=":", linewidth=1, alpha=0.6, label=f"LWL (2σ): {LWL:.2%}")
  1513. # 标注异常点
  1514. for v in we_violations:
  1515. if "Rule 1" in v["规则"]:
  1516. anomaly_date = pd.Timestamp(v["日期"])
  1517. val = float(v["值"].rstrip("%")) / 100
  1518. ax_spc.annotate("⚠️", (anomaly_date, val), fontsize=12,
  1519. ha="center", va="bottom", color="red")
  1520. ax_spc.set_title("SPC 控制图 - 每日缺陷率")
  1521. ax_spc.set_ylabel("缺陷率")
  1522. ax_spc.tick_params(axis="x", rotation=45)
  1523. ax_spc.legend(fontsize=8, loc="upper right")
  1524. ax_spc.grid(True, alpha=0.3)
  1525. st.pyplot(fig_spc)
  1526. plt.close()
  1527. # --- 趋势图 ---
  1528. st.subheader("缺陷率趋势 (含线性回归)")
  1529. fig_trend, ax_trend = plt.subplots(figsize=(14, 4))
  1530. ax_trend.plot(daily_all["day"], daily_all["defect_rate"],
  1531. marker="o", markersize=3, linewidth=1.5, color="steelblue", label="日缺陷率")
  1532. ax_trend.plot(daily_all["day"], daily_all["trend"],
  1533. color="red", linestyle="--", linewidth=2, label=f"趋势线 (斜率: {slope*100:.3f}%/天)")
  1534. ax_trend.fill_between(daily_all["day"], daily_all["defect_rate"], alpha=0.1, color="steelblue")
  1535. ax_trend.axhline(y=p_bar, color="green", linestyle="--", alpha=0.5, label=f"平均: {p_bar:.2%}")
  1536. ax_trend.set_ylabel("缺陷率")
  1537. ax_trend.tick_params(axis="x", rotation=45)
  1538. ax_trend.legend(fontsize=8)
  1539. ax_trend.grid(True, alpha=0.3)
  1540. st.pyplot(fig_trend)
  1541. plt.close()
  1542. # --- 告警清单 ---
  1543. st.divider()
  1544. st.subheader("⚠️ Western Electric 规则告警清单")
  1545. if we_violations:
  1546. we_df = pd.DataFrame(we_violations)
  1547. st.dataframe(we_df, use_container_width=True)
  1548. st.warning(f"共发现 **{len(we_violations)}** 次统计异常,建议关注对应日期的工艺参数和人员排班")
  1549. else:
  1550. st.success("✅ 未触发 Western Electric 规则告警,过程处于统计控制状态")
  1551. # --- 结论 ---
  1552. st.divider()
  1553. st.subheader("📋 过程能力结论")
  1554. if trend_status == "改善中":
  1555. st.success(
  1556. f"**趋势改善中** 📉\n\n"
  1557. f"每日缺陷率以平均 {abs(slope)*100:.3f}%/天 的速度下降。\n"
  1558. f"当前平均缺陷率为 {p_bar:.2%},控制上限 {UCL:.2%}。\n"
  1559. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。"
  1560. )
  1561. elif trend_status == "恶化中":
  1562. st.error(
  1563. f"**趋势恶化中** 📈\n\n"
  1564. f"每日缺陷率以平均 {slope*100:.3f}%/天 的速度上升。\n"
  1565. f"当前平均缺陷率为 {p_bar:.2%},控制上限 {UCL:.2%}。\n"
  1566. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。\n\n"
  1567. f"建议:检查近期工艺参数变化、设备状态和原材料批次。"
  1568. )
  1569. else:
  1570. st.info(
  1571. f"**过程稳定** ➡️\n\n"
  1572. f"缺陷率趋势平稳,斜率 {slope*100:.3f}%/天,无显著上升或下降。\n"
  1573. f"当前平均缺陷率为 {p_bar:.2%},控制限 [{LCL:.2%}, {UCL:.2%}]。\n"
  1574. f"{'已触发' if we_violations else '未触发'} Western Electric 规则告警。"
  1575. )
  1576. # ========== 重复缺陷坐标检测 ==========
  1577. _t = get_tab("🗺️ 空间集中性")
  1578. if _t:
  1579. with _t:
  1580. st.divider()
  1581. st.subheader("🎯 重复缺陷坐标检测")
  1582. st.markdown(
  1583. "检测在不同面板上重复出现的缺陷坐标。随机缺陷不会在同一位置反复出现,"
  1584. "而设备硬伤(如吸嘴划伤、夹具压痕)会在相同位置持续产生缺陷。"
  1585. "这是从'描述分析'跨入'根因诊断'的关键一步。"
  1586. )
  1587. # 坐标分桶:将面板划分为网格,找出跨面板重复的缺陷桶
  1588. repeat_bin_size = st.slider("坐标分桶大小 (mm)", min_value=5, max_value=50, value=15, step=5,
  1589. help="将坐标按此大小分桶,同一桶内出现于不同面板的缺陷视为'重复'")
  1590. pw = df["panel_width_mm"].iloc[0]
  1591. ph = df["panel_height_mm"].iloc[0]
  1592. # 计算桶ID
  1593. df_copy = filtered_df.copy()
  1594. df_copy["x_bin"] = (df_copy["x_mm"] // repeat_bin_size).astype(int)
  1595. df_copy["y_bin"] = (df_copy["y_mm"] // repeat_bin_size).astype(int)
  1596. df_copy["bin_key"] = df_copy["x_bin"].astype(str) + "_" + df_copy["y_bin"].astype(str)
  1597. # 统计每个桶出现在多少不同面板上
  1598. bin_panels = df_copy.groupby("bin_key").agg(
  1599. panel_count=("panel_id", "nunique"),
  1600. defect_count=("defect_id", "count"),
  1601. x_center=("x_mm", "mean"),
  1602. y_center=("y_mm", "mean"),
  1603. dominant_type=("defect_type", lambda x: x.mode().iloc[0] if len(x) > 0 else "-"),
  1604. dominant_severity=("severity", lambda x: x.mode().iloc[0] if len(x) > 0 else "-"),
  1605. ).reset_index()
  1606. repeat_threshold = st.slider("重复判定阈值 (跨面板数)", min_value=2, max_value=10, value=3)
  1607. repeated_bins = bin_panels[bin_panels["panel_count"] >= repeat_threshold].sort_values("panel_count", ascending=False)
  1608. col_repeat1, col_repeat2 = st.columns([1, 2])
  1609. with col_repeat1:
  1610. st.metric("重复缺陷桶数", f"{len(repeated_bins)}",
  1611. delta=f"阈值: ≥{repeat_threshold} 块面板")
  1612. if len(repeated_bins) > 0:
  1613. st.dataframe(
  1614. repeated_bins[["panel_count", "defect_count", "x_center", "y_center", "dominant_type", "dominant_severity"]]
  1615. .rename(columns={"panel_count": "涉及面板", "defect_count": "缺陷总数",
  1616. "x_center": "中心X", "y_center": "中心Y",
  1617. "dominant_type": "主要类型", "dominant_severity": "主要严重度"}),
  1618. use_container_width=True, height=400
  1619. )
  1620. else:
  1621. st.info(f"未发现跨 {repeat_threshold}+ 块面板的重复缺陷坐标")
  1622. with col_repeat2:
  1623. if len(repeated_bins) > 0:
  1624. # 在面板图上标注重复缺陷桶
  1625. fig_repeat, ax_repeat = plt.subplots(figsize=(4, 6))
  1626. # 面板背景
  1627. ax_repeat.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  1628. ax_repeat.add_patch(plt.Rectangle((8, 8), pw-16, ph-16, facecolor="#16213e", edgecolor="#0f3460", linewidth=1.5))
  1629. # 所有缺陷散点(淡)
  1630. ax_repeat.scatter(filtered_df["x_mm"], filtered_df["y_mm"],
  1631. alpha=0.1, s=2, c="gray", edgecolors="none", zorder=1)
  1632. # 重复缺陷桶标注重叠圈
  1633. max_count = repeated_bins["panel_count"].max()
  1634. for _, row in repeated_bins.iterrows():
  1635. size = 100 + (row["panel_count"] / max_count) * 400
  1636. ax_repeat.scatter(row["x_center"], row["y_center"],
  1637. s=size, c="red", alpha=0.3, edgecolors="red",
  1638. linewidth=2, zorder=3)
  1639. ax_repeat.text(row["x_center"], row["y_center"],
  1640. str(row["panel_count"]), ha="center", va="center",
  1641. fontsize=8, color="white", fontweight="bold", zorder=4)
  1642. ax_repeat.set_xlim(-5, pw + 5)
  1643. ax_repeat.set_ylim(-5, ph + 5)
  1644. ax_repeat.set_title(f"重复缺陷坐标 (≥{repeat_threshold} 块面板)", fontsize=11)
  1645. ax_repeat.set_xlabel("X (mm)")
  1646. ax_repeat.set_ylabel("Y (mm)")
  1647. ax_repeat.set_aspect("equal")
  1648. ax_repeat.grid(True, alpha=0.1, color="gray")
  1649. st.pyplot(fig_repeat)
  1650. plt.close()
  1651. else:
  1652. st.info("调整分桶大小或阈值以检测重复缺陷")
  1653. # ========== Tab 9: 缺陷模式识别 ==========
  1654. _t = get_tab("🔬 缺陷模式识别")
  1655. if _t:
  1656. with _t:
  1657. st.header("🔬 缺陷空间模式自动识别")
  1658. st.markdown(
  1659. "参考 WM811K 晶圆缺陷图谱分类标准,对每块面板的缺陷分布进行模式评分。"
  1660. "不同模式对应不同的根因机制(如边缘型→贴合工艺,角落型→夹具应力,"
  1661. "中心型→压力不均,线条型→机械刮伤,随机型→来料污染)。"
  1662. )
  1663. from scipy.spatial import ConvexHull
  1664. from scipy.spatial.distance import cdist
  1665. pw = df["panel_width_mm"].iloc[0]
  1666. ph = df["panel_height_mm"].iloc[0]
  1667. # 按面板分组,逐块分析模式
  1668. panel_groups = filtered_df.groupby("panel_id")
  1669. patterns_results = []
  1670. for panel_id, panel_data in panel_groups:
  1671. if len(panel_data) < 3:
  1672. continue
  1673. coords = panel_data[["x_mm", "y_mm"]].values
  1674. # 归一化坐标到 [0,1]
  1675. x_norm = panel_data["x_mm"].values / pw
  1676. y_norm = panel_data["y_mm"].values / ph
  1677. # --- 模式1: 边缘型 (缺陷靠近面板四边) ---
  1678. # 计算每个点到最近边缘的距离比例
  1679. edge_dist = np.minimum(np.minimum(x_norm, 1 - x_norm),
  1680. np.minimum(y_norm, 1 - y_norm))
  1681. edge_ratio = (edge_dist < 0.12).mean() # 12% 以内的点视为边缘点
  1682. edge_score = edge_ratio
  1683. # --- 模式2: 角落型 (缺陷集中在四个角落) ---
  1684. corner_threshold = 0.15 # 15% 范围
  1685. in_corner = (
  1686. ((x_norm < corner_threshold) & (y_norm < corner_threshold)) | # 左下
  1687. ((x_norm < corner_threshold) & (y_norm > 1 - corner_threshold)) | # 左上
  1688. ((x_norm > 1 - corner_threshold) & (y_norm < corner_threshold)) | # 右下
  1689. ((x_norm > 1 - corner_threshold) & (y_norm > 1 - corner_threshold)) # 右上
  1690. )
  1691. corner_score = in_corner.mean()
  1692. # --- 模式3: 中心型 (缺陷集中在面板中心区域) ---
  1693. center_x, center_y = 0.5, 0.5
  1694. dist_to_center = np.sqrt((x_norm - center_x)**2 + (y_norm - center_y)**2)
  1695. center_radius = 0.18 # 18% 半径
  1696. center_score = (dist_to_center < center_radius).mean()
  1697. # --- 模式4: 线条型 (缺陷沿一条线分布) ---
  1698. # 用 PCA 第一主成分占比来判断线性程度
  1699. if len(coords) >= 3:
  1700. from sklearn.decomposition import PCA
  1701. pca = PCA(n_components=2)
  1702. pca.fit(coords)
  1703. linearity = pca.explained_variance_ratio_[0] # 第一主成分占比
  1704. line_score = linearity
  1705. else:
  1706. line_score = 0
  1707. # --- 模式5: 随机型 (均匀分布,无明显模式) ---
  1708. # 用空间变异系数:将面板分为网格,计算各格缺陷数的变异系数
  1709. grid_n = 5
  1710. x_edges = np.linspace(0, pw, grid_n + 1)
  1711. y_edges = np.linspace(0, ph, grid_n + 1)
  1712. H, _, _ = np.histogram2d(panel_data["x_mm"].values, panel_data["y_mm"].values,
  1713. bins=[x_edges, y_edges])
  1714. if H.sum() > 0 and H.std() > 0:
  1715. cv = H.std() / H.mean() if H.mean() > 0 else 999
  1716. # cv 越小越均匀(随机)
  1717. randomness_score = max(0, 1 - cv / 3) # 归一化到 [0,1]
  1718. else:
  1719. randomness_score = 0
  1720. # --- 主导模式判定 ---
  1721. scores = {
  1722. "边缘型": edge_score,
  1723. "角落型": corner_score,
  1724. "中心型": center_score,
  1725. "线条型": line_score,
  1726. "随机型": randomness_score,
  1727. }
  1728. dominant_pattern = max(scores, key=scores.get)
  1729. patterns_results.append({
  1730. "面板ID": panel_id,
  1731. "缺陷数": len(panel_data),
  1732. "主导模式": dominant_pattern,
  1733. "边缘型": round(edge_score, 2),
  1734. "角落型": round(corner_score, 2),
  1735. "中心型": round(center_score, 2),
  1736. "线条型": round(line_score, 2),
  1737. "随机型": round(randomness_score, 2),
  1738. })
  1739. if patterns_results:
  1740. pattern_df = pd.DataFrame(patterns_results)
  1741. # --- 模式统计 ---
  1742. col_pat1, col_pat2, col_pat3 = st.columns([1, 1, 2])
  1743. with col_pat1:
  1744. pattern_counts = pattern_df["主导模式"].value_counts()
  1745. fig_pat, ax_pat = plt.subplots(figsize=(8, 5))
  1746. colors_pat = {"边缘型": "#FF6B6B", "角落型": "#FFA500", "中心型": "#4ECDC4",
  1747. "线条型": "#9B59B6", "随机型": "#95A5A6"}
  1748. bars = ax_pat.bar(pattern_counts.index, pattern_counts.values,
  1749. color=[colors_pat.get(p, "#888") for p in pattern_counts.index],
  1750. alpha=0.8)
  1751. for bar, count in zip(bars, pattern_counts.values):
  1752. ax_pat.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
  1753. str(count), ha="center", va="bottom", fontsize=11, fontweight="bold")
  1754. ax_pat.set_title("缺陷模式分布")
  1755. ax_pat.set_ylabel("面板数量")
  1756. st.pyplot(fig_pat)
  1757. plt.close()
  1758. with col_pat2:
  1759. st.subheader("模式占比")
  1760. total_panels = len(pattern_df)
  1761. for pattern in ["边缘型", "角落型", "中心型", "线条型", "随机型"]:
  1762. count = (pattern_df["主导模式"] == pattern).sum()
  1763. pct = count / total_panels * 100
  1764. st.metric(pattern, f"{count} 块", f"{pct:.1f}%")
  1765. with col_pat3:
  1766. # --- 模式-根因映射 ---
  1767. st.subheader("模式 → 可能根因")
  1768. root_cause_map = {
  1769. "边缘型": {
  1770. "可能原因": "贴合工艺参数异常、边缘夹具压力不均、涂胶厚度不均",
  1771. "建议排查": "检查贴合压力、边缘密封工艺、涂胶均匀性"
  1772. },
  1773. "角落型": {
  1774. "可能原因": "夹具应力集中、面板放置定位偏差、角落散热不良",
  1775. "建议排查": "检查夹具对齐、面板定位精度、角落温度分布"
  1776. },
  1777. "中心型": {
  1778. "可能原因": "压力中心不均、FPC绑定区域工艺异常、中心温度过高",
  1779. "建议排查": "检查压力分布曲线、FPC绑定参数、加热板温度"
  1780. },
  1781. "线条型": {
  1782. "可能原因": "机械刮伤、传送带划痕、清洗刷毛磨损、吸嘴移动轨迹",
  1783. "建议排查": "检查传送带状态、清洗设备、吸嘴运动轨迹"
  1784. },
  1785. "随机型": {
  1786. "可能原因": "来料污染、环境尘埃、化学药液杂质",
  1787. "建议排查": "检查洁净室等级、来料检验记录、药液过滤状态"
  1788. },
  1789. }
  1790. for pattern in ["边缘型", "角落型", "中心型", "线条型", "随机型"]:
  1791. count = (pattern_df["主导模式"] == pattern).sum()
  1792. if count == 0:
  1793. continue
  1794. rc = root_cause_map[pattern]
  1795. with st.expander(f"{pattern} ({count} 块面板)"):
  1796. st.markdown(f"**可能原因**: {rc['可能原因']}")
  1797. st.markdown(f"**建议排查**: {rc['建议排查']}")
  1798. # --- 详细数据表 ---
  1799. st.divider()
  1800. st.subheader("面板模式评分明细")
  1801. st.dataframe(pattern_df, use_container_width=True, height=400)
  1802. else:
  1803. st.warning("当前筛选条件下无足够面板数据进行模式分析(需至少 3 个缺陷/面板)")
  1804. # ========== Tab 10: 设备健康与共性分析 ==========
  1805. _t = get_tab("💚 设备健康与共性分析")
  1806. if _t:
  1807. with _t:
  1808. st.header("💚 设备健康评分 & 共性分析")
  1809. st.markdown(
  1810. "综合评估各台设备的健康状态,并在发现异常批次时自动分析其共性特征。"
  1811. )
  1812. # --- 设备健康评分 ---
  1813. st.subheader("设备健康评分 (0-100)")
  1814. st.markdown("评分维度:缺陷率(40%) + 座号集中度(30%) + 严重度分布(30%)")
  1815. health_data = []
  1816. for eq_id in sorted(df["equipment_id"].unique()):
  1817. eq_all = df[df["equipment_id"] == eq_id]
  1818. eq_filtered = filtered_df[filtered_df["equipment_id"] == eq_id]
  1819. # 维度1: 缺陷率评分 (40%)
  1820. eq_panels = eq_all["panel_id"].nunique()
  1821. eq_defects = len(eq_all)
  1822. eq_defect_rate = eq_defects / max(eq_panels, 1)
  1823. # 缺陷率越低分越高,线性归一化
  1824. # 以 5 个缺陷/面板为最差(0分),0 为最好(100分)
  1825. rate_score = max(0, 100 * (1 - eq_defect_rate / 5))
  1826. # 维度2: 座号集中度评分 (30%)
  1827. # 座号分布越均匀分越高,集中分越低
  1828. eq_seat_counts = eq_all.groupby("seat_id").size()
  1829. if len(eq_seat_counts) > 1:
  1830. seat_cv = eq_seat_counts.std() / max(eq_seat_counts.mean(), 0.001)
  1831. # cv 越小越均匀,得分越高
  1832. seat_score = max(0, 100 * (1 - seat_cv / 3))
  1833. else:
  1834. seat_score = 50
  1835. # 维度3: 严重度评分 (30%)
  1836. eq_sev = eq_all["severity"].value_counts()
  1837. severe_ratio = eq_sev.get("严重", 0) / max(len(eq_all), 1)
  1838. sev_score = max(0, 100 * (1 - severe_ratio * 3)) # 严重占比 33% 时为 0 分
  1839. # 综合得分
  1840. total_score = rate_score * 0.4 + seat_score * 0.3 + sev_score * 0.3
  1841. health_data.append({
  1842. "设备ID": eq_id,
  1843. "缺陷总数": eq_defects,
  1844. "缺陷率": f"{eq_defect_rate:.2f}",
  1845. "座号集中度(CV)": f"{seat_cv:.2f}" if len(eq_seat_counts) > 1 else "N/A",
  1846. "严重占比": f"{severe_ratio:.1%}",
  1847. "缺陷率分(40%)": round(rate_score, 1),
  1848. "座号分(30%)": round(seat_score, 1),
  1849. "严重度分(30%)": round(sev_score, 1),
  1850. "健康总分": round(total_score, 1),
  1851. })
  1852. health_df = pd.DataFrame(health_data).sort_values("健康总分", ascending=False)
  1853. # 显示健康评分
  1854. col_h1, col_h2 = st.columns([3, 2])
  1855. with col_h1:
  1856. st.dataframe(health_df, use_container_width=True, hide_index=True)
  1857. with col_h2:
  1858. # 可视化排名
  1859. fig_health, ax_health = plt.subplots(figsize=(6, 4))
  1860. health_sorted = health_df.sort_values("健康总分", ascending=True)
  1861. colors_health = ["#4CAF50" if s >= 70 else "#FF9800" if s >= 40 else "#F44336"
  1862. for s in health_sorted["健康总分"]]
  1863. bars = ax_health.barh(health_sorted["设备ID"], health_sorted["健康总分"],
  1864. color=colors_health, alpha=0.8, height=0.5)
  1865. for bar, score in zip(bars, health_sorted["健康总分"]):
  1866. ax_health.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
  1867. f"{score:.0f}", ha="left", va="center", fontsize=12, fontweight="bold")
  1868. ax_health.set_xlabel("健康评分 (0-100)")
  1869. ax_health.set_title("设备健康排名")
  1870. ax_health.set_xlim(0, 110)
  1871. st.pyplot(fig_health)
  1872. plt.close()
  1873. # --- 共性分析 ---
  1874. st.divider()
  1875. st.subheader("🔍 异常批次共性分析")
  1876. st.markdown("选中异常批次后,自动分析这些批次的共同特征(设备/时段/座号/缺陷类型)。")
  1877. # 自动检测异常批次(基于缺陷率)
  1878. batch_stats = df.groupby("batch_id").agg(
  1879. defects=("defect_id", "count"),
  1880. panels=("panel_id", "nunique")
  1881. )
  1882. batch_stats["defect_rate"] = batch_stats["defects"] / batch_stats["panels"]
  1883. threshold = batch_stats["defect_rate"].mean() + batch_stats["defect_rate"].std()
  1884. abnormal_batches = batch_stats[batch_stats["defect_rate"] > threshold].index.tolist()
  1885. st.info(f"自动检测到的异常批次 (缺陷率 > {threshold:.2%}): **{len(abnormal_batches)}** 个")
  1886. st.write(", ".join(abnormal_batches[:10]))
  1887. if abnormal_batches:
  1888. col_c1, col_c2 = st.columns(2)
  1889. with col_c1:
  1890. # 选择要分析的批次
  1891. selected_abnormal = st.multiselect(
  1892. "选择要分析的异常批次",
  1893. options=abnormal_batches,
  1894. default=abnormal_batches[:3] if len(abnormal_batches) >= 3 else abnormal_batches,
  1895. key="commonality_batch"
  1896. )
  1897. if selected_abnormal:
  1898. abnormal_df = df[df["batch_id"].isin(selected_abnormal)]
  1899. normal_df = df[~df["batch_id"].isin(selected_abnormal)]
  1900. st.divider()
  1901. st.markdown(f"**分析对象**: {len(selected_abnormal)} 个异常批次, "
  1902. f"{len(abnormal_df)} 条缺陷记录")
  1903. # 共性分析:设备
  1904. st.subheader("共性特征 TOP3")
  1905. col_common1, col_common2, col_common3 = st.columns(3)
  1906. with col_common1:
  1907. # 设备共性
  1908. abnormal_eq_rate = abnormal_df.groupby("equipment_id").size() / len(abnormal_df)
  1909. normal_eq_rate = normal_df.groupby("equipment_id").size() / len(normal_df)
  1910. eq_boost = {}
  1911. for eq in abnormal_df["equipment_id"].unique():
  1912. a_rate = abnormal_eq_rate.get(eq, 0)
  1913. n_rate = normal_eq_rate.get(eq, 0)
  1914. if n_rate > 0:
  1915. eq_boost[eq] = (a_rate - n_rate) / n_rate * 100
  1916. else:
  1917. eq_boost[eq] = 999
  1918. eq_top = sorted(eq_boost.items(), key=lambda x: x[1], reverse=True)[:3]
  1919. st.markdown("**设备共用性**")
  1920. for eq, boost in eq_top:
  1921. st.markdown(f"- {eq}: 异常占比 {abnormal_eq_rate.get(eq, 0):.1%}, "
  1922. f"相对正常 **+{boost:.0f}%**")
  1923. with col_common2:
  1924. # 时段共性
  1925. abnormal_hour = abnormal_df.groupby("hour").size() / len(abnormal_df)
  1926. normal_hour = normal_df.groupby("hour").size() / len(normal_df)
  1927. # 按班次聚合
  1928. abnormal_shift = abnormal_df.groupby("shift").size() / len(abnormal_df)
  1929. normal_shift = normal_df.groupby("shift").size() / len(normal_df)
  1930. st.markdown("**时段共性**")
  1931. for shift in ["白班", "夜班"]:
  1932. a_rate = abnormal_shift.get(shift, 0)
  1933. n_rate = normal_shift.get(shift, 0)
  1934. if n_rate > 0:
  1935. boost = (a_rate - n_rate) / n_rate * 100
  1936. else:
  1937. boost = 999
  1938. st.markdown(f"- {shift}: 异常占比 {a_rate:.1%}, "
  1939. f"相对正常 **{'+' if boost > 0 else ''}{boost:.0f}%**")
  1940. with col_common3:
  1941. # 座号共性
  1942. abnormal_seat = abnormal_df.groupby("seat_id").size() / len(abnormal_df)
  1943. normal_seat = normal_df.groupby("seat_id").size() / len(normal_df)
  1944. seat_boost = {}
  1945. for seat in abnormal_df["seat_id"].unique():
  1946. a_rate = abnormal_seat.get(seat, 0)
  1947. n_rate = normal_seat.get(seat, 0)
  1948. if n_rate > 0:
  1949. seat_boost[seat] = (a_rate - n_rate) / n_rate * 100
  1950. else:
  1951. seat_boost[seat] = 999
  1952. seat_top = sorted(seat_boost.items(), key=lambda x: x[1], reverse=True)[:3]
  1953. st.markdown("**座号共性**")
  1954. for seat, boost in seat_top:
  1955. st.markdown(f"- {seat}: 异常占比 {abnormal_seat.get(seat, 0):.1%}, "
  1956. f"相对正常 **+{boost:.0f}%**")
  1957. # --- 缺陷类型偏差 ---
  1958. st.subheader("异常批次缺陷类型偏差")
  1959. abnormal_type = abnormal_df.groupby("defect_type").size() / len(abnormal_df)
  1960. normal_type = normal_df.groupby("defect_type").size() / len(normal_df)
  1961. type_diff = []
  1962. for t in set(list(abnormal_type.index) + list(normal_type.index)):
  1963. a_rate = abnormal_type.get(t, 0)
  1964. n_rate = normal_type.get(t, 0)
  1965. type_diff.append({
  1966. "缺陷类型": t,
  1967. "异常占比": f"{a_rate:.1%}",
  1968. "正常占比": f"{n_rate:.1%}",
  1969. "偏差": f"{'+' if a_rate > n_rate else ''}{(a_rate - n_rate) / max(n_rate, 0.001) * 100:.0f}%",
  1970. })
  1971. st.dataframe(pd.DataFrame(type_diff).sort_values("偏差", key=lambda x: x.str.rstrip("%").astype(float), ascending=False),
  1972. use_container_width=True, hide_index=True)
  1973. # ========== Tab 11: 多层叠加分析 ==========
  1974. _t = get_tab("🔲 多层叠加分析")
  1975. if _t:
  1976. with _t:
  1977. st.header("🔲 多层叠加分析")
  1978. st.markdown(
  1979. "将缺陷数据与面板物理区域、设备座号、时间维度叠加在同一视图上,"
  1980. "揭示单一维度看不到的深层关联。"
  1981. )
  1982. pw = df["panel_width_mm"].iloc[0]
  1983. ph = df["panel_height_mm"].iloc[0]
  1984. # --- 自定义区域定义 ---
  1985. st.subheader("📐 自定义区域缺陷统计")
  1986. st.markdown("将面板划分为不同功能区域,统计各区域缺陷分布")
  1987. # 定义区域:(名称, 判定函数)
  1988. # 边缘区:距四边 < 15%
  1989. # 中心区:距中心 < 20% 半径
  1990. # 角落区:四个角的 15% 范围
  1991. # FPC区:Y > 70% 高度
  1992. # 上半区/下半区
  1993. def classify_zone(x_norm, y_norm):
  1994. """将每个缺陷点分类到区域"""
  1995. zones = []
  1996. for i in range(len(x_norm)):
  1997. zx, zy = x_norm[i], y_norm[i]
  1998. zone_list = []
  1999. # 边缘区
  2000. if min(zx, 1 - zx, zy, 1 - zy) < 0.15:
  2001. zone_list.append("边缘区")
  2002. # 中心区
  2003. if np.sqrt((zx - 0.5)**2 + (zy - 0.5)**2) < 0.20:
  2004. zone_list.append("中心区")
  2005. # 角落区
  2006. if (zx < 0.15 or zx > 0.85) and (zy < 0.15 or zy > 0.85):
  2007. zone_list.append("角落区")
  2008. # FPC区
  2009. if zy > 0.70:
  2010. zone_list.append("FPC区")
  2011. # 上半区
  2012. if zy < 0.50:
  2013. zone_list.append("上半区")
  2014. # 下半区
  2015. if zy > 0.50:
  2016. zone_list.append("下半区")
  2017. if not zone_list:
  2018. zone_list.append("其他区域")
  2019. zones.append(", ".join(zone_list))
  2020. return zones
  2021. # 计算每个缺陷的区域归属
  2022. x_norm_arr = filtered_df["x_mm"].values / pw
  2023. y_norm_arr = filtered_df["y_mm"].values / ph
  2024. filtered_df_copy = filtered_df.copy()
  2025. filtered_df_copy["zone"] = classify_zone(x_norm_arr, y_norm_arr)
  2026. # 统计各区域缺陷数
  2027. zone_counts = {}
  2028. zone_types = ["边缘区", "中心区", "角落区", "FPC区", "上半区", "下半区", "其他区域"]
  2029. for z in zone_types:
  2030. count = filtered_df_copy["zone"].str.contains(z).sum()
  2031. zone_counts[z] = count
  2032. col_z1, col_z2 = st.columns([1, 2])
  2033. with col_z1:
  2034. st.subheader("区域缺陷统计")
  2035. for z in zone_types:
  2036. count = zone_counts.get(z, 0)
  2037. pct = count / max(len(filtered_df_copy), 1) * 100
  2038. bar_len = int(pct / 100 * 200)
  2039. bar = "█" * max(bar_len, 0)
  2040. st.markdown(f"{z} | {bar} **{count}** ({pct:.1f}%)")
  2041. with col_z2:
  2042. # 区域可视化
  2043. fig_zone, ax_zone = plt.subplots(figsize=(4, 6))
  2044. # 面板背景
  2045. ax_zone.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=2))
  2046. # 区域边界
  2047. # 边缘区 (15% 边界)
  2048. margin_x = pw * 0.15
  2049. margin_y = ph * 0.15
  2050. ax_zone.add_patch(plt.Rectangle((0, 0), margin_x, ph, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  2051. ax_zone.add_patch(plt.Rectangle((pw - margin_x, 0), margin_x, ph, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  2052. ax_zone.add_patch(plt.Rectangle((0, 0), pw, margin_y, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  2053. ax_zone.add_patch(plt.Rectangle((0, ph - margin_y), pw, margin_y, fill=False, edgecolor="yellow", linewidth=1, alpha=0.4, linestyle="--"))
  2054. # 中心区 (20% 半径)
  2055. center_r = 0.20 * max(pw, ph) / 2
  2056. circle = plt.Circle((pw/2, ph/2), center_r, fill=False, edgecolor="cyan", linewidth=1.5, alpha=0.5, linestyle="--")
  2057. ax_zone.add_patch(circle)
  2058. # FPC区
  2059. fpc_y = ph * 0.70
  2060. ax_zone.add_patch(plt.Rectangle((0, fpc_y), pw, ph - fpc_y, fill=False, edgecolor="magenta", linewidth=1.5, alpha=0.5, linestyle="--"))
  2061. # 缺陷散点
  2062. scatter_colors = {"边缘区": "yellow", "中心区": "cyan", "角落区": "orange",
  2063. "FPC区": "magenta", "上半区": "#4ECDC4", "下半区": "#45B7D1", "其他区域": "gray"}
  2064. for z_name in zone_types:
  2065. z_mask = filtered_df_copy["zone"].str.contains(z_name)
  2066. if z_mask.sum() > 0:
  2067. z_data = filtered_df_copy[z_mask]
  2068. ax_zone.scatter(z_data["x_mm"], z_data["y_mm"],
  2069. c=scatter_colors.get(z_name, "gray"), s=5, alpha=0.3,
  2070. label=f"{z_name} ({z_mask.sum()})", edgecolors="none", zorder=2)
  2071. ax_zone.set_xlim(-5, pw + 5)
  2072. ax_zone.set_ylim(-5, ph + 5)
  2073. ax_zone.set_title("缺陷区域叠加图 (虚线=区域边界)")
  2074. ax_zone.set_xlabel("X (mm)")
  2075. ax_zone.set_ylabel("Y (mm)")
  2076. ax_zone.set_aspect("equal")
  2077. ax_zone.legend(fontsize=7, loc="upper right", ncol=1, framealpha=0.7)
  2078. st.pyplot(fig_zone)
  2079. plt.close()
  2080. # --- 跨批次同座号面板对比 ---
  2081. st.divider()
  2082. st.subheader("🔀 跨批次同座号面板对比")
  2083. st.markdown(
  2084. "选择一台设备和一个座号,查看该座号在不同批次生产的面板上缺陷分布的对比。"
  2085. "如果同一座号持续在相同位置产生缺陷 → 该座号存在系统性问题。"
  2086. )
  2087. col_cmp1, col_cmp2, col_cmp3 = st.columns(3)
  2088. with col_cmp1:
  2089. cmp_eq = st.selectbox("选择设备", options=sorted(df["equipment_id"].unique()), key="cmp_eq")
  2090. with col_cmp2:
  2091. eq_seats = sorted(df[(df["equipment_id"] == cmp_eq)]["seat_id"].unique())
  2092. cmp_seat = st.selectbox("选择座号", options=eq_seats, key="cmp_seat")
  2093. with col_cmp3:
  2094. # 找出有该设备座号缺陷的批次
  2095. eq_seat_batches = sorted(df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat)]["batch_id"].unique())
  2096. cmp_batches = st.multiselect("选择对比批次", options=eq_seat_batches, default=eq_seat_batches[:3] if len(eq_seat_batches) >= 3 else eq_seat_batches)
  2097. if cmp_batches and len(cmp_batches) >= 2:
  2098. n_cols = min(len(cmp_batches), 3)
  2099. n_rows = (len(cmp_batches) + n_cols - 1) // n_cols
  2100. fig_cmp, axes_cmp = plt.subplots(n_rows, n_cols, figsize=(3.5 * n_cols, 5 * n_rows))
  2101. axes_cmp = axes_cmp.flatten() if n_cols * n_rows > 1 else [axes_cmp]
  2102. for i, batch in enumerate(cmp_batches):
  2103. ax = axes_cmp[i]
  2104. batch_data = df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == batch)]
  2105. # 面板背景
  2106. ax.add_patch(plt.Rectangle((0, 0), pw, ph, facecolor="#1a1a2e", edgecolor="#444", linewidth=1))
  2107. if len(batch_data) > 0:
  2108. # 按缺陷类型着色
  2109. type_colors = {"划痕": "red", "亮点": "yellow", "暗点": "black", "气泡": "cyan",
  2110. "色差": "magenta", "漏光": "orange", "裂纹": "darkred", "异物": "green"}
  2111. for _, row in batch_data.iterrows():
  2112. c = type_colors.get(row["defect_type"], "white")
  2113. ax.scatter(row["x_mm"], row["y_mm"], c=c, s=30, alpha=0.7, edgecolors="white", linewidth=0.3, zorder=3)
  2114. ax.set_xlim(-3, pw + 3)
  2115. ax.set_ylim(-3, ph + 3)
  2116. ax.set_title(f"{batch}\n{len(batch_data)} 缺陷", fontsize=9)
  2117. ax.set_aspect("equal")
  2118. ax.grid(True, alpha=0.1, color="gray")
  2119. ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
  2120. # 隐藏多余子图
  2121. for j in range(len(cmp_batches), len(axes_cmp)):
  2122. axes_cmp[j].set_visible(False)
  2123. fig_cmp.suptitle(f"{cmp_eq} / {cmp_seat} 跨批次对比", fontsize=12, y=1.01)
  2124. plt.tight_layout()
  2125. st.pyplot(fig_cmp)
  2126. plt.close()
  2127. # 对比统计
  2128. st.subheader("对比统计")
  2129. comp_stats = []
  2130. for batch in cmp_batches:
  2131. batch_data = df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == batch)]
  2132. comp_stats.append({
  2133. "批次": batch,
  2134. "缺陷数": len(batch_data),
  2135. "主要类型": batch_data["defect_type"].mode().iloc[0] if len(batch_data) > 0 else "-",
  2136. "严重占比": f"{(batch_data['severity']=='严重').sum() / max(len(batch_data), 1):.0%}",
  2137. "中心X": round(batch_data["x_mm"].mean(), 1) if len(batch_data) > 0 else "-",
  2138. "中心Y": round(batch_data["y_mm"].mean(), 1) if len(batch_data) > 0 else "-",
  2139. })
  2140. st.dataframe(pd.DataFrame(comp_stats), use_container_width=True, hide_index=True)
  2141. # 趋势判断
  2142. if len(cmp_batches) >= 3:
  2143. defect_counts = [len(df[(df["equipment_id"] == cmp_eq) & (df["seat_id"] == cmp_seat) & (df["batch_id"] == b)]) for b in cmp_batches]
  2144. x_trend = np.arange(len(cmp_batches))
  2145. coeffs = np.polyfit(x_trend, defect_counts, 1)
  2146. slope = coeffs[0]
  2147. if slope > 0.5:
  2148. st.warning(f"⚠️ **{cmp_eq}/{cmp_seat}** 缺陷数呈**上升趋势** (斜率: {slope:.1f}/批次),建议安排设备检修")
  2149. elif slope < -0.5:
  2150. st.success(f"✅ **{cmp_eq}/{cmp_seat}** 缺陷数呈**改善趋势** (斜率: {slope:.1f}/批次)")
  2151. else:
  2152. st.info(f"➡️ **{cmp_eq}/{cmp_seat}** 缺陷数**平稳** (斜率: {slope:.1f}/批次)")
  2153. else:
  2154. st.info("请选择至少 2 个批次进行对比")
  2155. # --- 缺陷传播追踪 ---
  2156. st.divider()
  2157. st.subheader("📡 缺陷坐标传播追踪")
  2158. st.markdown(
  2159. "追踪同一坐标区域在时间轴上的缺陷演变,识别持续恶化的位置。"
  2160. "如果某坐标的缺陷数量随时间递增 → 该位置存在渐进性损伤(如吸嘴持续磨损)。"
  2161. )
  2162. # 坐标分桶 + 时间维度
  2163. prop_bin = st.slider("传播追踪分桶大小 (mm)", min_value=10, max_value=50, value=20, step=10)
  2164. df_time = df.copy()
  2165. df_time["x_bin"] = (df_time["x_mm"] // prop_bin).astype(int)
  2166. df_time["y_bin"] = (df_time["y_mm"] // prop_bin).astype(int)
  2167. # 按桶 + 日期聚合
  2168. prop_df = df_time.groupby(["x_bin", "y_bin", "day"]).size().reset_index(name="defect_count")
  2169. # 找出至少有 3 天数据的桶
  2170. bucket_days = prop_df.groupby(["x_bin", "y_bin"])["day"].nunique()
  2171. active_buckets = bucket_days[bucket_days >= 3].index.tolist()
  2172. if active_buckets:
  2173. # 选择要追踪的桶
  2174. bucket_options = [f"({bx},{by})" for bx, by in active_buckets]
  2175. bucket_counts = prop_df.groupby(["x_bin", "y_bin"])["defect_count"].sum().sort_values(ascending=False)
  2176. # 默认选缺陷最多的桶
  2177. default_top = bucket_counts.index[0]
  2178. selected_bucket = st.selectbox(
  2179. "选择要追踪的坐标桶",
  2180. options=bucket_options,
  2181. index=0,
  2182. format_func=lambda x: f"{x} (总缺陷: {bucket_counts.loc[tuple(map(int, x.strip('()').split(',')))]:.0f})"
  2183. )
  2184. bx, by = map(int, selected_bucket.strip("()").split(","))
  2185. bucket_timeline = prop_df[(prop_df["x_bin"] == bx) & (prop_df["y_bin"] == by)].sort_values("day")
  2186. bucket_timeline["day"] = pd.to_datetime(bucket_timeline["day"])
  2187. # 传播趋势图
  2188. fig_prop, ax_prop = plt.subplots(figsize=(12, 4))
  2189. ax_prop.bar(bucket_timeline["day"], bucket_timeline["defect_count"],
  2190. color="steelblue", alpha=0.7, width=0.8)
  2191. # 趋势线
  2192. if len(bucket_timeline) >= 2:
  2193. x_t = np.arange(len(bucket_timeline))
  2194. coeffs_p = np.polyfit(x_t, bucket_timeline["defect_count"].values, 1)
  2195. slope_p = coeffs_p[0]
  2196. trend_y = np.polyval(coeffs_p, x_t)
  2197. ax_prop.plot(bucket_timeline["day"], trend_y, color="red", linestyle="--",
  2198. linewidth=2, label=f"趋势 (斜率: {slope_p:.2f}/天)")
  2199. if slope_p > 0.3:
  2200. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数上升 (恶化趋势)")
  2201. elif slope_p < -0.3:
  2202. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数下降 (改善趋势)")
  2203. else:
  2204. ax_prop.set_title(f"坐标桶 ({bx},{by}) — 缺陷数平稳")
  2205. else:
  2206. ax_prop.set_title(f"坐标桶 ({bx},{by})")
  2207. ax_prop.set_ylabel("缺陷数量")
  2208. ax_prop.tick_params(axis="x", rotation=45)
  2209. ax_prop.legend()
  2210. ax_prop.grid(True, alpha=0.3, axis="y")
  2211. st.pyplot(fig_prop)
  2212. plt.close()
  2213. # 该桶的缺陷类型演变
  2214. bucket_data = df_time[(df_time["x_bin"] == bx) & (df_time["y_bin"] == by)]
  2215. st.markdown(f"**坐标桶 ({bx},{by}) 缺陷类型演变** (对应面板区域: X {bx*prop_bin}-{(bx+1)*prop_bin}mm, Y {by*prop_bin}-{(by+1)*prop_bin}mm)")
  2216. bucket_type_timeline = bucket_data.groupby(["day", "defect_type"]).size().unstack(fill_value=0)
  2217. bucket_type_timeline.index = pd.to_datetime(bucket_type_timeline.index)
  2218. st.dataframe(bucket_type_timeline, use_container_width=True, height=300)
  2219. else:
  2220. st.info("当前数据中无足够多天数的连续缺陷坐标桶 (需 ≥3 天)")
  2221. # --- 底部:数据导出 ---
  2222. st.divider()
  2223. if current_config["show_export"]:
  2224. st.subheader("📥 数据导出")
  2225. # 综合报告导出
  2226. st.subheader("📋 一键导出综合报告")
  2227. st.markdown("包含所有分析模块的关键结论,适合汇报和存档。")
  2228. report_parts = []
  2229. report_parts.append("# 缺陷集中性分析综合报告\n")
  2230. report_parts.append(f"**生成时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  2231. report_parts.append(f"**数据范围**: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}")
  2232. report_parts.append(f"**筛选后缺陷数**: {len(filtered_df)} 条")
  2233. report_parts.append(f"**涉及面板**: {filtered_df['panel_id'].nunique()} 块")
  2234. report_parts.append(f"**视图模式**: {view_mode}\n")
  2235. # 1. KPI 摘要
  2236. report_parts.append("## 1. KPI 摘要\n")
  2237. report_kpis = calculate_kpis(df, filtered_df)
  2238. total_panels_inspected_r = report_kpis["total_panels_inspected"]
  2239. defective_panels_r = report_kpis["defective_panels"]
  2240. yield_rate_r = report_kpis["yield_rate"]
  2241. report_parts.append(f"- 检测面板数: {total_panels_inspected_r} 块")
  2242. defective_rate_r = defective_panels_r / max(total_panels_inspected_r, 1) * 100
  2243. report_parts.append(f"- 不良面板数: {defective_panels_r} 块 ({defective_rate_r:.1f}%)")
  2244. report_parts.append(f"- 综合良率: {yield_rate_r:.1f}%")
  2245. report_parts.append(f"- 缺陷总数: {len(filtered_df)} 个")
  2246. report_parts.append(f"- 严重缺陷: {(filtered_df['severity']=='严重').sum()} 个\n")
  2247. # 2. 缺陷类型
  2248. report_parts.append("## 2. 缺陷类型分布\n")
  2249. type_counts_r = filtered_df["defect_type"].value_counts()
  2250. for t, c in type_counts_r.items():
  2251. report_parts.append(f"- {t}: {c} ({c/len(filtered_df)*100:.1f}%)")
  2252. report_parts.append("")
  2253. # 3. 设备/座号
  2254. if "equipment_id" in filtered_df.columns:
  2255. report_parts.append("## 3. 设备与座号分布\n")
  2256. eq_counts = filtered_df["equipment_id"].value_counts()
  2257. for e, c in eq_counts.items():
  2258. report_parts.append(f"- {e}: {c} 个缺陷")
  2259. seat_top = filtered_df["seat_id"].value_counts().head(5)
  2260. report_parts.append(f"\n**缺陷座号 TOP5**:")
  2261. for i, (s, c) in enumerate(seat_top.items(), 1):
  2262. report_parts.append(f" {i}. {s}: {c} 个")
  2263. report_parts.append("")
  2264. # 4. 趋势
  2265. report_parts.append("## 4. 趋势分析\n")
  2266. daily_r = filtered_df.groupby("day").size()
  2267. if len(daily_r) >= 2:
  2268. x_r = np.arange(len(daily_r))
  2269. coeffs_r = np.polyfit(x_r, daily_r.values.astype(float), 1)
  2270. slope_r = coeffs_r[0]
  2271. if slope_r > 0:
  2272. report_parts.append(f"- 缺陷数趋势: **上升** (斜率 {slope_r:.1f}/天)")
  2273. else:
  2274. report_parts.append(f"- 缺陷数趋势: **下降** (斜率 {slope_r:.1f}/天)")
  2275. report_parts.append("")
  2276. # 5. 异常座号
  2277. report_parts.append("## 5. 异常检测\n")
  2278. if "seat_id" in filtered_df.columns:
  2279. all_seat_stats_r = filtered_df.groupby(["equipment_id", "seat_id"]).size()
  2280. mean_r = all_seat_stats_r.mean()
  2281. std_r = all_seat_stats_r.std()
  2282. threshold_2x_r = mean_r + 2 * std_r
  2283. critical_r = all_seat_stats_r[all_seat_stats_r > threshold_2x_r]
  2284. if len(critical_r) > 0:
  2285. report_parts.append(f"- ⚠️ 2σ 异常座号: {len(critical_r)} 个")
  2286. for (eq, seat), count in critical_r.items():
  2287. report_parts.append(f" - {eq}/{seat}: {count} 个缺陷")
  2288. else:
  2289. report_parts.append("- ✅ 无 2σ 异常座号")
  2290. report_parts.append("")
  2291. # 6. 建议
  2292. report_parts.append("## 6. 建议\n")
  2293. top_type = type_counts_r.index[0] if len(type_counts_r) > 0 else "-"
  2294. top_eq = eq_counts.index[0] if len(eq_counts) > 0 else "-"
  2295. report_parts.append(f"- 重点关注缺陷类型: **{top_type}**")
  2296. report_parts.append(f"- 重点关注设备: **{top_eq}**")
  2297. report_parts.append("- 建议查看 SPC 控制图确认趋势状态")
  2298. report_parts.append("- 建议检查设备健康评分\n")
  2299. report_parts.append("---\n*本报告由缺陷集中性分析系统自动生成*")
  2300. full_report = "\n".join(report_parts)
  2301. col_exp1, col_exp2, col_exp3 = st.columns(3)
  2302. with col_exp1:
  2303. st.download_button(
  2304. label="📥 综合报告 (MD)",
  2305. data=full_report.encode("utf-8"),
  2306. file_name=f"defect_report_{datetime.now().strftime('%Y%m%d')}.md",
  2307. mime="text/markdown",
  2308. use_container_width=True
  2309. )
  2310. with col_exp2:
  2311. csv_data = filtered_df.to_csv(index=False).encode("utf-8-sig")
  2312. st.download_button(
  2313. label="📥 筛选数据 (CSV)",
  2314. data=csv_data,
  2315. file_name=f"defect_data_{datetime.now().strftime('%Y%m%d')}.csv",
  2316. mime="text/csv",
  2317. use_container_width=True
  2318. )
  2319. with col_exp3:
  2320. # 精简版 TXT 报告
  2321. txt_lines = ["缺陷集中性分析报告", f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
  2322. f"缺陷数: {len(filtered_df)} | 面板: {filtered_df['panel_id'].nunique()}",
  2323. f"良率: {yield_rate_r:.1f}%"]
  2324. for t, c in type_counts_r.head(3).items():
  2325. txt_lines.append(f" TOP: {t} {c}个")
  2326. txt_content = "\n".join(txt_lines)
  2327. st.download_button(
  2328. label="📥 精简报告 (TXT)",
  2329. data=txt_content.encode("utf-8"),
  2330. file_name=f"defect_summary_{datetime.now().strftime('%Y%m%d')}.txt",
  2331. mime="text/plain",
  2332. use_container_width=True
  2333. )