[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-ai-image-generation-pack-zh":3,"seo:pack:ai-image-generation-pack:zh":99},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":98},"ai-image-generation-pack","🖼️","#EC4899","new","本周新建","AI 图像生成全家桶","10 件套，给规模化跑图的开发者和创作者。ComfyUI 节点图、AUTOMATIC1111 + Fooocus 跑 SDXL、InvokeAI 上生产、Flux + ControlNet 做空间控制、Kohya 训 LoRA、Diffusers 做 Python 底座、AnimateDiff 出动画、Replicate 云端批量 — 按复利顺序装。",[16,28,36,43,52,59,66,73,80,88],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},208,"02888d06-d950-42f4-bc45-960c1f604ee4","comfyui-node-based-ai-image-generation-02888d06","ComfyUI — Node-Based AI Image Generation","The most powerful modular AI image generation GUI with a node\u002Fgraph editor. Supports Stable Diffusion, Flux, SDXL, ControlNet, and 1000+ custom nodes. 107K+ stars.","AI Open Source",193,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":34,"view_count":35,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1297,"b0727fbf-37db-11f1-9bc6-00163e2b0d79","stable-diffusion-web-ui-automatic1111-definitive-local-ai-b0727fbf","Stable Diffusion Web UI by AUTOMATIC1111 — The Definitive Local AI Image Generator","AUTOMATIC1111's Stable Diffusion Web UI is the most popular interface for running Stable Diffusion locally. It supports text-to-image, image-to-image, inpainting, ControlNet, LoRA, embeddings, extensions, and every model variant — all in a self-hosted browser UI.","Script Depot",120,{"id":37,"uuid":38,"slug":39,"title":40,"description":41,"author_name":34,"view_count":42,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2456,"4d4c2b85-453a-11f1-9bc6-00163e2b0d79","invokeai-professional-creative-engine-stable-diffusion-4d4c2b85","InvokeAI — Professional Creative Engine for Stable Diffusion","A leading open-source creative engine for Stable Diffusion and Flux models with a polished WebUI, node-based workflows, and production-grade image generation.",123,{"id":44,"uuid":45,"slug":46,"title":47,"description":48,"author_name":34,"view_count":49,"vote_count":24,"lang_type":25,"type":50,"type_label":51},1299,"b0b1b970-37db-11f1-9bc6-00163e2b0d79","fooocus-focus-prompting-generating-not-tooling-b0b1b970","Fooocus — Focus on Prompting and Generating, Not the Tooling","Fooocus is a Stable Diffusion image generator that strips away every dial and toggle. Just type a prompt and get magazine-quality results — opinionated defaults, automatic prompt engineering, and SDXL-grade output with one click.",87,"prompt","Prompt",{"id":53,"uuid":54,"slug":55,"title":56,"description":57,"author_name":22,"view_count":58,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4664,"74fc6ef5-54cb-11f1-9bc6-00163e2b0d79","controlnet-add-spatial-control-diffusion-models-74fc6ef5","ControlNet — Add Spatial Control to Diffusion Models","ControlNet lets you add precise spatial conditioning such as edge maps, depth, and pose to Stable Diffusion, giving fine-grained control over AI image generation.",13,{"id":60,"uuid":61,"slug":62,"title":63,"description":64,"author_name":34,"view_count":65,"vote_count":24,"lang_type":25,"type":26,"type_label":27},111,"4ef1950f-2a47-4e24-9ce2-6f648dea8bed","diffusers-universal-video-image-generation-hub-4ef1950f","Diffusers — Universal Video & Image Generation Hub","Hugging Face's diffusion model library. Run CogVideoX, AnimateDiff, Stable Video Diffusion, and 50+ video\u002Fimage models with a unified API. 33,200+ stars.",170,{"id":67,"uuid":68,"slug":69,"title":70,"description":71,"author_name":22,"view_count":72,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2461,"cd2c15cb-453a-11f1-9bc6-00163e2b0d79","kohya-sd-scripts-training-scripts-stable-diffusion-flux-cd2c15cb","Kohya sd-scripts — Training Scripts for Stable Diffusion and Flux","Comprehensive training, fine-tuning, and generation scripts for Stable Diffusion, SDXL, and Flux models. The standard toolkit for LoRA, DreamBooth, and textual inversion training.",106,{"id":74,"uuid":75,"slug":76,"title":77,"description":78,"author_name":22,"view_count":79,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2463,"04d7fee0-453b-11f1-9bc6-00163e2b0d79","animatediff-plug-play-animation-diffusion-models-04d7fee0","AnimateDiff — Plug-and-Play Animation for Diffusion Models","A plug-and-play motion module that turns community text-to-image Stable Diffusion models into animation generators without additional training. ICLR 2024 Spotlight paper.",103,{"id":81,"uuid":82,"slug":83,"title":84,"description":85,"author_name":86,"view_count":87,"vote_count":24,"lang_type":25,"type":26,"type_label":27},833,"e80aca76-b9b8-4330-8611-ee1ead26c99e","replicate-run-ai-models-via-simple-api-calls-e80aca76","Replicate — Run AI Models via Simple API Calls","Cloud platform to run open-source AI models with a simple API. Replicate hosts Llama, Stable Diffusion, Whisper, and thousands of models — no GPU setup or Docker required.","Replicate",204,{"id":89,"uuid":90,"slug":91,"title":92,"description":93,"author_name":94,"view_count":95,"vote_count":24,"lang_type":25,"type":96,"type_label":97},3640,"e3b150c0-ca4d-54e2-a564-12206ee44e81","mcp-image-mcp-image-generation-editing-server","mcp-image — MCP Image Generation & Editing Server","mcp-image is an MCP server for image generation\u002Fediting with quality presets; verified 110★ and documents `npx -y mcp-image` configs for Cursor and Claude.","MCP Hub",56,"mcp","MCP","tokrepo install pack\u002Fai-image-generation-pack",{"pageType":100,"pageKey":8,"locale":101,"title":102,"metaDescription":103,"h1":104,"tldr":105,"bodyMarkdown":106,"faq":107,"schema":123,"internalLinks":129,"citations":142,"wordCount":155,"generatedAt":156},"pack","zh","AI 图像生成全家桶 — 10 个开源工具按顺序装齐","ComfyUI \u002F AUTOMATIC1111 \u002F Fooocus \u002F InvokeAI \u002F ControlNet \u002F Diffusers \u002F Kohya \u002F AnimateDiff \u002F Replicate \u002F mcp-image — 规模化跑 SDXL \u002F Flux、训 LoRA、把图像生成接进 agent 的工程师按这个顺序装。含安装顺序和踩坑提醒。","AI 图像生成全家桶 — 从 SDXL 到 Flux 到 LoRA 一条龙","10 个开源工具按精心设计的顺序装：先装工作流引擎（ComfyUI），再装基础 UI（AUTOMATIC1111 或 Fooocus），再上生产（InvokeAI），再加空间控制（ControlNet），再下到 Python 底座（Diffusers），再训 LoRA（Kohya），再做动画（AnimateDiff），再上云批量（Replicate），最后接 MCP。每一层解锁下一层。","## 这个 pack 包含什么\n\n这是一个真正搞图像生成的工程师周末会装的核心 — 不是 Civitai 收藏夹倾倒。每个都是**开源**、**活跃维护**、**值得占磁盘**的。安装顺序经过精心安排：每个工具回答前一个工具产生的问题。\n\n如果你一周只生成一张图，本 pack 你用不上 — Midjourney 就够了。本 pack 针对的是这些场景：要**可复用可版本化的工作流图**、要**训练自己的人物 \u002F 风格 LoRA**、要 **ControlNet 姿态 \u002F 深度条件控制**、要在 **Replicate 上批量跑 10k 张**、要把图像生成**通过 MCP 接进 Claude \u002F Codex agent**。2026 年这一摞需求只能用开源解决。\n\n## 推荐安装顺序\n\n1. **ComfyUI** — 工作流引擎。**从这里开始**，因为后面所有工具最终都会接到 ComfyUI 的某个节点里。图形化、JSON 可序列化的工作流，1000+ 自定义节点支持 Flux \u002F SDXL \u002F ControlNet \u002F LoRA。装完 ComfyUI，其他东西都只是 `models\u002Fcheckpoints\u002F` 里的一个文件。\n2. **AUTOMATIC1111 (SD Web UI)** — 基础模型 UI。新下载一个 SDXL \u002F SD 1.5 checkpoint 想快速验证、不想接节点的最低门槛方式。留着做快速健康检查；正经生产还是 ComfyUI。\n3. **InvokeAI** — 生产级画布 + 队列。A1111 是研究员的玩具场，InvokeAI 才是真 UI：团队友好的元数据、prompt 库、队列管理。出图量真正起来的时候上它。\n4. **Fooocus** — 有主见的 SDXL，默认参数香。「就给我一张好图」的兄弟版。适合团队里的非工程师，也是「好的默认参数长什么样」的参考。\n5. **ControlNet** — 空间条件控制。能生成之后立刻就会想加：姿态、深度、边缘、分割。ControlNet 就是答案；它不是独立 app，而是在 ComfyUI \u002F A1111 \u002F InvokeAI \u002F Diffusers 里加载的模型插件。\n6. **Diffusers (Hugging Face)** — Python 底座。上面所有工具底层都是 Diffusers。当你要脚本化批量 5 万张、从 notebook 调用、或者组合 pipeline（SDXL + IP-Adapter + ControlNet + Refiner）— 就下沉到 Diffusers。**别从这里开始**，要的时候才下沉。\n7. **Kohya sd-scripts** — LoRA 训练事实标准。SD 1.5 \u002F SDXL \u002F Flux 的 LoRA 训练社区都用它。跑两周图你就会想要一个人物 \u002F 风格 LoRA — 社区都是这么训的。配 24GB 显卡或者租一小时 A100。\n8. **AnimateDiff** — 扩散模型的运动模块。在 ComfyUI 里接一个节点，现有图像模型直接出 16 帧视频片段。不学新模型族进入 AI 视频的最便宜入口。\n9. **Replicate** — 本地不够用时上云批量。要 10k 张图，或者模型太大跑不动（Flux dev 满精度要 24GB+），API 推给 Replicate。按秒计费。同样的模型 — 把 prompt JSON 推过去，拿 URL 回来。\n10. **mcp-image** — 给 agent 的 MCP server。最新一层：通过 MCP 把图像生成暴露成 Claude Code \u002F Codex \u002F Gemini CLI 的工具。让 agent 自己「画好图嵌进文档」而不是来求你。\n\n## 它们怎么协同\n\n```\nComfyUI (工作流引擎)\n   │\n   ├─ 从磁盘加载 checkpoints + LoRAs + ControlNet 模型\n   │\n   └─ 节点底层调 Diffusers (HF)\n         │\n         ├─ Kohya 训出 ComfyUI 要加载的 LoRA\n         │\n         └─ AnimateDiff 是 ComfyUI 的一个节点，不是独立 app\n\nAUTOMATIC1111 \u002F Fooocus — 基础模型快速健康检查\nInvokeAI — 生产画布 + 队列（和 ComfyUI 平行）\n\nControlNet — 模型插件，活在以上所有工具内部\n\nReplicate — 同样的模型文件，通过 HTTPS 跑在云端\n\nmcp-image — 把以上任何一个暴露成 MCP 工具\n```\n\n核心组合是 **ComfyUI + ControlNet + Kohya + Diffusers**。这四件套覆盖：什么都能生成、能训自己的风格、能加姿态 \u002F 深度 \u002F 边缘条件、UI 走不通的时候下沉到 Python。pack 里其他全都是绕这四件套的专用适配器。\n\n## 你会遇到的取舍\n\n- **ComfyUI vs AUTOMATIC1111 vs InvokeAI** — A1111 用来快速测模型。ComfyUI 用来搭可复用的正经工作流。InvokeAI 给需要真队列 + 元数据的团队。三个都装、各司其职，别想用一个工具搞定三件事。\n- **Fooocus vs ComfyUI** — Fooocus 默认参数好，ComfyUI 天花板高。Fooocus 给设计师同事，ComfyUI 留自己用。\n- **本地训 LoRA vs 租 A100** — 1024 分辨率 SDXL LoRA 在 Kohya 上：4090 大概 1.5-3 小时，A100 大概 25-45 分钟（约 $1.50）。一周不到一个 LoRA 就租；多了就本地买。\n- **Replicate vs 自建** — Replicate 适合脉冲负载和本地跑不动的大模型（Flux dev 满精度要 24GB+）。稳定吞吐量的话，自己一张 4090 在 SDXL 量级下不到 30 天回本。\n- **MCP 图像生成 vs 直接 API** — 只在你的 agent 真的需要出图时才接 mcp-image，否则就是个没人用的活动部件。\n\n## 常见踩坑\n\n- **磁盘按 30GB \u002F checkpoint 起步消失** — SDXL base 约 7GB、Flux dev 约 24GB、加 LoRA（每个 150MB）、加 ControlNet 模型（每个约 1.5GB）、加 VAE。认真搞至少 500GB SSD 起步。\n- **CUDA \u002F xformers 版本漂移** — 上面每个工具想要的 PyTorch + CUDA + xformers 组合都略不同。**一个 venv 一个工具**，pin 死版本。别想让 ComfyUI + A1111 + InvokeAI 共享一个 venv。\n- **Kohya 训出来的 LoRA 一眼坏了的人物** — 99% 是数据集问题（10 张 768px 是底线，30+ 张 1024px 才安全），不是超参问题。先把数据集打磨好再碰学习率。\n- **ControlNet 模型和 base 不匹配** — SDXL 的 ControlNet **不能**用在 SD 1.5 上，反过来也是。不匹配 = 一团噪声。下载前看文件名后缀（`_sdxl` \u002F `_sd15`）。\n- **AnimateDiff 第一次跑出来是幻灯片不是动画** — context length \u002F motion scale \u002F 采样步数要联调。先跑官方发布的示例工作流再自己改。",[108,111,114,117,120],{"q":109,"a":110},"跑这套需要多少显存？","12GB 是 SDXL 通过 ComfyUI \u002F A1111 跑图的底线。16GB 能在 Kohya 上训 SDXL LoRA。24GB（4090）是舒适目标 — 能本地跑 Flux dev、合理时间训 LoRA、能扛 ControlNet + LoRA 叠加。12GB 以下只能跑 SD 1.5 和量化 Flux 变体，重活推荐丢给 Replicate。",{"q":112,"a":113},"为啥不直接用 Midjourney？","Midjourney 适合一次性的创意出图。本 pack 针对 Midjourney 干不了的场景：训练你自己人物 \u002F 产品的 LoRA、从输入图做 ControlNet 姿态条件、10k 张图带一致元数据的批量任务、通过 MCP 把图像生成接进 Claude Code \u002F Codex agent、敏感输入下 100% 离线运行。如果这些场景都用不上 — Midjourney 就是对的答案。",{"q":115,"a":116},"ComfyUI 看着挺吓人 — 要不先从 AUTOMATIC1111 开始？","从你能先装上的那个开始。A1111 上手快（文本框 + 点生成）。ComfyUI 第一小时陡，但一旦你想要可版本化、可分享、可确定性重跑的工作流，立刻回本。如果你是开发者，ComfyUI 的 JSON 序列化图谱一天内就会让你感到「对味」。",{"q":118,"a":119},"Diffusers 和 ComfyUI 都要装吗？","一开始不用。ComfyUI 底层就是 Diffusers，你通过节点图就拿到了 Diffusers 的能力。直接用 Diffusers 的时机是：要脚本化批量、要搭自定义 pipeline（一次调用里 SDXL + IP-Adapter + ControlNet + Refiner）、或者要把图像生成嵌进更大的 Python 应用。交互式跑图的话，光 ComfyUI 就够了。",{"q":121,"a":122},"训 LoRA 难吗？","机械上不难 — Kohya sd-scripts 的默认参数能用。难的是数据集：30+ 张多样、高分辨率、清晰打标的目标图。机械操作半天学完，**数据集打磨才是真正的手艺**。第一个 LoRA 给自己一个周末，前两次有心理准备会扔掉。",{"@context":124,"@type":125,"name":13,"description":126,"numberOfItems":127,"inLanguage":128},"https:\u002F\u002Fschema.org","ItemList","10 个开源 AI 图像生成工具，给规模化跑图的开发者和创作者，按从工作流引擎到 LoRA 训练再到 MCP 集成的顺序排好。",10,"zh-CN",[130,134,138],{"url":131,"anchor":132,"reason":133},"\u002Fzh\u002Fai-tools-for\u002Fimage-generation","TokRepo 上的全部图像生成资产","更全的图像生成工具、模型、工作流目录",{"url":135,"anchor":136,"reason":137},"\u002Fzh\u002Ffeatured","TokRepo 精选资产","这 10 个工具属于更大的精选目录",{"url":139,"anchor":140,"reason":141},"\u002Fzh\u002Ftopics","浏览其他主题 pack","还有 AI Agent \u002F MCP server \u002F 多智能体框架等多个主题 pack",[143,147,151],{"claim":144,"source_name":145,"source_url":146},"ComfyUI 是一个节点式的扩散模型图像生成 GUI","ComfyUI GitHub 仓库","https:\u002F\u002Fgithub.com\u002Fcomfyanonymous\u002FComfyUI",{"claim":148,"source_name":149,"source_url":150},"AUTOMATIC1111 是 Stable Diffusion 经典 Web UI","Stable Diffusion Web UI GitHub 仓库","https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui",{"claim":152,"source_name":153,"source_url":154},"ControlNet 为扩散模型增加空间条件控制","ControlNet 论文 \u002F GitHub","https:\u002F\u002Fgithub.com\u002Flllyasviel\u002FControlNet",920,"2026-05-22T00:00:00Z"]