[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-log-analysis-search-zh":3,"seo:pack:log-analysis-search:zh":99},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":98},"log-analysis-search","📜","#374151","new","本周新建","日志分析 + 搜索全家桶","凌晨 3 点翻日志的工程师的 10 件套：结构化日志库、ship→store 全链路（Fluent Bit → Loki \u002F Elasticsearch \u002F ClickHouse）、本地 SQL 查日志的 lnav、Sentry 错误聚合，再加 MCP server 让 AI agent 直接查 trace 和告警。",[16,28,35,43,51,58,67,74,81,91],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2134,"17e7e031-40e4-11f1-9bc6-00163e2b0d79","winston-versatile-logging-library-node-js-17e7e031","winston — Versatile Logging Library for Node.js","winston is the most popular logging library for Node.js, offering multiple transports, structured JSON output, and configurable log levels for production applications.","Script Depot",82,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":22,"view_count":34,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1261,"6922366e-37b5-11f1-9bc6-00163e2b0d79","loguru-python-logging-made-stupidly-simple-6922366e","Loguru — Python Logging Made Stupidly Simple","Loguru replaces Python logging boilerplate with a single import. No handlers, no formatters, no config files — just logger.info(). It adds colorized output, structured context, file rotation, and exception diagnosis out of the box.",88,{"id":36,"uuid":37,"slug":38,"title":39,"description":40,"author_name":41,"view_count":42,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1404,"18438936-38e7-11f1-9bc6-00163e2b0d79","fluent-bit-lightweight-high-performance-log-metrics-18438936","Fluent Bit — Lightweight High-Performance Log and Metrics Processor","Fluent Bit is a fast, lightweight telemetry agent from the Fluentd family. It collects logs, metrics and traces from any source, processes them with filters, and forwards them to dozens of backends.","AI Open Source",109,{"id":44,"uuid":45,"slug":46,"title":47,"description":48,"author_name":49,"view_count":50,"vote_count":24,"lang_type":25,"type":26,"type_label":27},958,"92fa7c1f-352f-11f1-9bc6-00163e2b0d79","grafana-loki-prometheus-inspired-log-aggregation-system-92fa7c1f","Grafana Loki — Prometheus-Inspired Log Aggregation System","Loki is a horizontally scalable, multi-tenant log aggregation system by Grafana Labs. Unlike other log systems, Loki indexes metadata about logs, not log content itself.","Grafana Labs",210,{"id":52,"uuid":53,"slug":54,"title":55,"description":56,"author_name":22,"view_count":57,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1201,"8cbbd0e8-3734-11f1-9bc6-00163e2b0d79","elasticsearch-distributed-search-analytics-engine-8cbbd0e8","Elasticsearch — Distributed Search and Analytics Engine","Elasticsearch is the most popular search and analytics engine. It provides near-real-time full-text search, structured search, analytics, and logging across petabytes of data — powering search for Wikipedia, GitHub, Stack Overflow, and millions of applications.",166,{"id":59,"uuid":60,"slug":61,"title":62,"description":63,"author_name":41,"view_count":64,"vote_count":24,"lang_type":25,"type":65,"type_label":66},965,"2fce985b-3535-11f1-9bc6-00163e2b0d79","clickhouse-open-source-real-time-analytics-database-2fce985b","ClickHouse — Open Source Real-Time Analytics Database","ClickHouse is a lightning-fast, open-source column-oriented database for real-time analytics. Query billions of rows in milliseconds with SQL. Used by Cloudflare, Uber, eBay.",114,"config","Config",{"id":68,"uuid":69,"slug":70,"title":71,"description":72,"author_name":22,"view_count":73,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1375,"4493f997-38c4-11f1-9bc6-00163e2b0d79","lnav-logfile-navigator-sql-live-tailing-4493f997","lnav — The Logfile Navigator with SQL and Live Tailing","lnav is an advanced log file viewer that understands dozens of log formats, provides SQL queries against log records, live-tails rotating files, and timestamps-merges multiple logs into one view.",113,{"id":75,"uuid":76,"slug":77,"title":78,"description":79,"author_name":41,"view_count":80,"vote_count":24,"lang_type":25,"type":26,"type_label":27},945,"ece57add-34d8-11f1-9bc6-00163e2b0d79","sentry-open-source-error-tracking-performance-monitoring-ece57add","Sentry — Open Source Error Tracking & Performance Monitoring","Sentry is the developer-first error tracking and performance monitoring platform. Capture exceptions, trace performance issues, and debug production errors across all languages.",173,{"id":82,"uuid":83,"slug":84,"title":85,"description":86,"author_name":87,"view_count":88,"vote_count":24,"lang_type":25,"type":89,"type_label":90},3608,"818380f9-674d-5217-88ab-f393ff99a247","signoz-mcp-server-query-traces-logs-alerts","SigNoz MCP Server — Query Traces, Logs & Alerts","SigNoz MCP Server connects MCP clients to your SigNoz instance: query traces\u002Flogs, inspect alerts, and automate observability workflows using an API key.","MCP Hub",86,"mcp","MCP",{"id":92,"uuid":93,"slug":94,"title":95,"description":96,"author_name":87,"view_count":97,"vote_count":24,"lang_type":25,"type":89,"type_label":90},3286,"284265e6-a9c0-5b2f-b769-60966256e908","clickhouse-mcp-read-only-defaults-drop-protection","ClickHouse MCP — Read-Only Defaults + Drop Protection","ClickHouse MCP connects MCP clients to ClickHouse or embedded chDB with read-only defaults, optional writes, and double opt-in for DROP\u002FTRUNCATE safety.",21,"tokrepo install pack\u002Flog-analysis-search",{"pageType":100,"pageKey":8,"locale":101,"title":102,"metaDescription":103,"h1":104,"tldr":105,"bodyMarkdown":106,"faq":107,"schema":123,"internalLinks":129,"citations":142,"wordCount":155,"generatedAt":156},"pack","zh","日志分析 + 搜索全家桶 — 凌晨 3 点排障的 10 个开源工具","winston \u002F Loguru \u002F Fluent Bit \u002F Loki \u002F Elasticsearch \u002F ClickHouse \u002F lnav \u002F Sentry \u002F SigNoz MCP \u002F ClickHouse MCP — 一条从原始 stdout 到结构化、可搜索、AI 可查的完整流水线。按推荐顺序安装。","日志分析 + 搜索全家桶 — 凌晨 3 点你真正需要的栈","10 件套按推荐安装顺序：先结构化日志库（winston \u002F Loguru），再 ship-and-store（Fluent Bit → Loki \u002F Elasticsearch \u002F ClickHouse），再查询工具（lnav 本地查、Sentry 错误聚合），最后 MCP server 让 AI agent 直接回答「凌晨 02:47 到底炸了什么」——你不用 grep 一行。","## 这个 pack 解决什么\n\n凌晨 3 点。寻呼器响了，5xx 飙了。你 SSH 上去，`tail -f` 一个已经轮转的文件，grep 一个其实是三个异常共用一段子串的错，40 分钟后你只缩小到「checkout 模块某个地方」。这就是这个 pack 干掉的痛点。\n\n目标不是「可观测性表演」——不需要 15 个没人打开的 dashboard。目标是：**结构化日志从一端进去，问题从另一端出来**，并且这个问题可以由你、你队友、或者一个有 MCP 权限的 AI agent 来问。\n\n每一个选品都是**开源或有可自建的开源核心**。整条流水线在一台中型 VM 上能撑到 ~50 GB\u002F天 的日志量；超过这个量级再把 Loki \u002F ClickHouse 分到独立机器。不绑厂商、不踩按量计费的坑。\n\n## 推荐安装顺序\n\n1. **winston (Node)** 或 **Loguru (Python)** — 从应用层的结构化日志开始。JSON 输出，每个事件一行，每行都有 `timestamp`、`level`、`service`、`trace_id`。**源头不结构化，后面所有工具都在和你的 formatter 打架，干不了正事。**\n2. **Fluent Bit** — 日志搬运工。tail 文件 \u002F journald \u002F Docker 日志，解析 JSON，加 host label，攒批，重试，发往你的存储。C 写的小二进制，常驻内存 ~5 MB，sidecar 或 DaemonSet 都行。中间层非它不可。\n3. **Grafana Loki** — 存储默认选项。**只索引 label**（不索引正文），用对象存储，跑起来便宜。最适合「结构化 JSON 进，按 `service=checkout level=error` 查」的场景。LogQL 语法像 PromQL——懂 Prometheus 5 分钟上手。\n4. **Elasticsearch** — 当你需要在日志正文里全文搜索、而不只是按 label 时的替代存储。重（JVM、磁盘开销大）但在「找所有提到 `OrderId=abc-123` 的日志」这类问题上无敌。配 Kibana 作 UI。\n5. **ClickHouse** — 当日志量**特别大**（>100 GB\u002F天）且需要 SQL 时的替代存储。列存，吃压缩 JSON 跟玩似的，Elasticsearch 跑 30 秒的查询它 1 秒出。规模上来后的正确答案。\n6. **lnav** — 终端本地日志导航器。直接对日志文件跑 SQL 查询，实时 tail，自动识别格式，错误高亮。SSH 到某一台机器、中心存储指不上时反手就用它。单个二进制，无 daemon。\n7. **Sentry** — 错误聚合 + 告警。和 Loki\u002FES\u002FCH 角色不同——那三个存所有日志；Sentry 专门抓**异常和 stack trace**，按指纹智能聚合去重，新错误出现或量突增时告警。可自建。\n8. **SigNoz MCP Server** — Model Context Protocol 桥接。让 Claude \u002F ChatGPT \u002F Cursor 用自然语言查 SigNoz 的 trace、日志、告警。「过去一小时最慢的接口是哪个？」→ 真数据真答案，不是幻觉。\n9. **ClickHouse MCP** — 存储是 ClickHouse 时更安全的 MCP 选择。默认只读、防 drop table、参数化查询。交给 agent 也不用担心它把 `DROP DATABASE production` 给执行了。\n\n## 流水线怎么协同\n\n```\n[ 你的应用 ]\n     │\n     ▼  winston \u002F Loguru   (输出结构化 JSON 到 stdout)\n     │\n[ Fluent Bit ]   (解析、加 label、攒批)\n     │\n     ├──▶ Loki           ← 便宜，按 label 索引\n     ├──▶ Elasticsearch  ← 重正文全文搜索\n     └──▶ ClickHouse     ← 大量级 SQL 分析\n     │\n     ├──▶ Sentry         ← 只收异常，聚合 + 告警\n     │\n     ▼  查询入口:\n        - lnav            (本地文件，无 daemon)\n        - Grafana         (Loki UI)\n        - Kibana          (ES UI)\n        - SigNoz MCP      (AI agent → trace\u002F日志\u002F告警)\n        - ClickHouse MCP  (AI agent → SQL，只读)\n```\n\n关键认知：**三个存储选一个，不要全装**。Loki 是 80% 团队的正确默认。只有当「在正文里全文搜」是日常需求才换 Elasticsearch；只有当日志量 + 查询延迟把 Loki 推爆才换 ClickHouse。pack 里列三个是因为正确答案取决于你的流量形态——不是让你三个都装。\n\n## 你会遇到的取舍\n\n- **Loki vs Elasticsearch vs ClickHouse** — Loki 最便宜最易运维，但全文搜索是真的弱（百万行子串匹配很慢）。Elasticsearch 反过来：重，但「全文找这个字符串」是它主场。ClickHouse 是 SQL 核选项——聚合查询飞快但你写的是 SQL 不是 LogQL\u002FKQL。选哪个就看你日常的问题更接近哪一种。\n- **winston vs Loguru vs pino vs zap** — winston 是 Node 默认但 pino 更快（生态也追上了）。Loguru 是 Python 默认但 `structlog` 在复杂上下文绑定时更灵活。本 pack 选默认；真撞到瓶颈再换。\n- **Sentry vs 日志存储** — Sentry 在错误捕获上和日志存储有重叠。两个都跑值得：Sentry 走「新错误出现 → 告警值班」回路；日志存储走「重建请求时序」回路。两套独立活儿。\n- **MCP server vs 自定义 agent 工具** — MCP 标准化了 agent 调用工具的方式，任何 MCP 兼容客户端（Claude Desktop、Cursor、ChatGPT 自定义 GPT）都能复用同一套 SigNoz\u002FClickHouse 接入。自己写 OpenAI function-calling 更灵活但不可移植。任何要给多个 agent runtime 用的工具，MCP 都赢。\n\n## 常见踩坑\n\n- **日志写字符串而不是结构化字段** — `log.info(\"user \" + userId + \" failed\")` 没法搜。`log.info({ event: \"login_failed\", userId })` 在三种存储里都能查。这一个改动让后面整套栈 80% 的价值才落得了地。\n- **Fluent Bit 没做反压控制** — 流量突增时 tail input 会 OOM。先设 `Mem_Buf_Limit` 并打开文件 buffer，不要等生产环境出事才补。\n- **Loki label 高基数** — 千万别拿 `user_id`、`request_id`、`trace_id` 当 label。Loki 存储成本和唯一 label 集数量成线性关系，一个失手的高基数 label 直接让账单涨 100 倍。label 只留 `service`、`env`、`level`、`host`。\n- **Sentry 采样率默认 100%** — 平时没问题，直到某个后台 job 10 分钟刷同一个错 5 万次把你配额打爆。在 SDK 的 `before_send` 钩子里把这种暴力循环在源头去重。\n- **MCP server 默认读写权限暴露** — 几乎所有 MCP server 文档都先给读写示例。ClickHouse MCP 这种尤其重要：agent 在对面时，只读模式（env 里设）是唯一安全默认。配置必审。\n- **把日志正文当 schema 索引** — ES\u002FCH 会让你把每个 JSON 字段都映射成列。半年后你有 12000 个字段，其中一半是某个 buggy 服务的 typo。事件名和字段名在 logger 层归一化，不要在存储层处理。",[108,111,114,117,120],{"q":109,"a":110},"Loki \u002F Elasticsearch \u002F ClickHouse 真的要三个都装吗？","不要——选一个。pack 里列三个是因为正确答案取决于你的形态。Loki 是 ~80% 团队的默认：便宜、按 label 索引、好运维。日常问题是「在任意一条日志正文里找这个字符串」就上 Elasticsearch（重正文全文搜索更强）。日志量超过 ~100 GB\u002F天 或要在日志上做真正的 SQL 分析就上 ClickHouse。三个并行跑做选型对比一两周没问题，作为长期常驻栈就太痛了。",{"q":112,"a":113},"这套栈里 AI 到底起什么作用——SigNoz MCP 就是个聊天 UI 吗？","不止是聊天 UI。MCP server 把 trace、日志、告警暴露成 agent 可以自主调用的工具。实际场景举例：Claude agent 接到一条 Sentry 告警，自己去 SigNoz 查 trace、从 Loki 拉对应日志、写一段事故摘要进工单——全程一个 prompt。ClickHouse MCP 在 SQL 风格的日志分析上是同样的角色，并且强制只读，agent 想 drop 表也下不去手。",{"q":115,"a":116},"为啥用 winston\u002FLoguru 而不是直接 `print` JSON？","三个理由。一：结构化字段通过 API 添加而不是字符串拼接，全代码库一致。二：日志等级、采样、传输（文件 \u002F stdout \u002F 网络）和调用点解耦。三：生态——winston 有 100+ 传输 plugin，Loguru 在 FastAPI\u002FDjango 里开箱即用。当然你可以自己 `json.dumps`，但一个月内就会把上面这些功能重新发明一遍。",{"q":118,"a":119},"如果错误日志已经进了 Loki，Sentry 是不是多余？","不多余，分工不同。Loki\u002FES\u002FCH 不加区分地存一切，回答「给我看看这个请求前后的时序」。Sentry 按 stack trace 指纹去重异常，把它们聚合成「issue」，跟踪首次出现 \u002F 回归 \u002F 量突增，并在新 issue 出现时叫值班。把 Sentry 当作你的错误收件箱，把日志存储当作目击证人——两套服务你，谁也替代不了谁。",{"q":121,"a":122},"整套能不能跑在一台 VM 上，还是必须 Kubernetes？","一台中型 VM（16 vCPU、32 GB 内存、500 GB SSD）能舒服撑到 ~20 GB\u002F天 的日志量，Loki + Fluent Bit + Sentry 自建全在一台上。超过 50 GB\u002F天 再把 Loki 的对象存储拆到 S3 兼容存储，给 ClickHouse\u002FElasticsearch 独立机器。不用 Kubernetes——docker-compose 完全够，50 GB\u002F天 以下甚至更可取。等你有运维精力维护 k8s 再上，不是日志流水线本身要求 k8s。",{"@context":124,"@type":125,"name":13,"description":126,"numberOfItems":127,"inLanguage":128},"https:\u002F\u002Fschema.org","ItemList","10 个开源工具按推荐安装顺序：结构化日志库、日志搬运工、三种存储选项（Loki \u002F Elasticsearch \u002F ClickHouse）、lnav 本地 SQL 查询、Sentry 错误聚合、MCP server 让 AI agent 直接查询。",10,"zh-CN",[130,134,138],{"url":131,"anchor":132,"reason":133},"\u002Fzh\u002Fai-tools-for\u002Fobservability","可观测性 AI 工具集","本 pack 中的 MCP server 就是可观测性的 agent 入口",{"url":135,"anchor":136,"reason":137},"\u002Fzh\u002Ftopics","浏览其他主题 pack","后端工具包、数据工程师工具箱等更多精选 pack",{"url":139,"anchor":140,"reason":141},"\u002Fzh\u002Ffeatured","TokRepo 精选资产","这 10 个工具属于更大的精选目录",[143,147,151],{"claim":144,"source_name":145,"source_url":146},"Grafana Loki 只索引 label，不索引日志正文","Grafana Loki 官方文档","https:\u002F\u002Fgrafana.com\u002Fdocs\u002Floki\u002Flatest\u002F",{"claim":148,"source_name":149,"source_url":150},"ClickHouse 是列式 OLAP 数据库，常用于大规模日志分析","ClickHouse 官方文档","https:\u002F\u002Fclickhouse.com\u002Fdocs",{"claim":152,"source_name":153,"source_url":154},"Model Context Protocol 是连接工具和 LLM agent 的开放标准","Model Context Protocol 规范","https:\u002F\u002Fmodelcontextprotocol.io\u002F",910,"2026-05-22T00:00:00Z"]