[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-ai-video-generation-pack-zh":3,"seo:pack:ai-video-generation-pack:zh":95},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":94},"ai-video-generation-pack","🎥","#7C3AED","new","本周新建","AI 视频生成包","给从文本或图像生成视频的创作者和开发者准备的十件套：开源模型（CogVideo \u002F Open-Sora \u002F AnimateDiff \u002F Diffusers）、对接 Sora \u002F Veo \u002F Runway \u002F Pika 的商业 API 桥、ControlNet + Motion Canvas 的镜头与运动控制、Real-ESRGAN 超分放大，再加把这些片段串起来的剪辑器。",[16,28,35,43,50,58,65,72,79,86],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2458,"7e2317bb-453a-11f1-9bc6-00163e2b0d79","cogvideo-text-image-video-generation-7e2317bb","CogVideo — Text and Image to Video Generation","An open-source video generation framework from Zhipu AI supporting text-to-video and image-to-video with CogVideoX models. Generates high-quality clips up to 6 seconds.","Script Depot",155,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":22,"view_count":34,"vote_count":24,"lang_type":25,"type":26,"type_label":27},109,"ff30d766-6654-4886-b631-e45e203f3a5e","open-sora-open-source-text-video-generation-ff30d766","Open-Sora — Open-Source Text-to-Video Generation","Open-source alternative to Sora by HPC-AI Tech. Generate videos from text prompts with an 11B parameter model. Apache 2.0 licensed. 28,800+ stars.",194,{"id":36,"uuid":37,"slug":38,"title":39,"description":40,"author_name":41,"view_count":42,"vote_count":24,"lang_type":25,"type":26,"type_label":27},777,"d848ded0-8bff-4424-8ef5-dda71b903327","together-ai-video-generation-skill-claude-code-d848ded0","Together AI Video Generation Skill for Claude Code","Skill that teaches Claude Code Together AI's video generation API. Covers text-to-video, image-to-video, and keyframe control for AI-powered video creation workflows.","Together AI",113,{"id":44,"uuid":45,"slug":46,"title":47,"description":48,"author_name":22,"view_count":49,"vote_count":24,"lang_type":25,"type":26,"type_label":27},111,"4ef1950f-2a47-4e24-9ce2-6f648dea8bed","diffusers-universal-video-image-generation-hub-4ef1950f","Diffusers — Universal Video & Image Generation Hub","Hugging Face's diffusion model library. Run CogVideoX, AnimateDiff, Stable Video Diffusion, and 50+ video\u002Fimage models with a unified API. 33,200+ stars.",173,{"id":51,"uuid":52,"slug":53,"title":54,"description":55,"author_name":56,"view_count":57,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2463,"04d7fee0-453b-11f1-9bc6-00163e2b0d79","animatediff-plug-play-animation-diffusion-models-04d7fee0","AnimateDiff — Plug-and-Play Animation for Diffusion Models","A plug-and-play motion module that turns community text-to-image Stable Diffusion models into animation generators without additional training. ICLR 2024 Spotlight paper.","AI Open Source",104,{"id":59,"uuid":60,"slug":61,"title":62,"description":63,"author_name":56,"view_count":64,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2495,"73d0fc65-45bc-11f1-9bc6-00163e2b0d79","real-esrgan-practical-image-video-super-resolution-73d0fc65","Real-ESRGAN — Practical Image and Video Super-Resolution","General-purpose image and video restoration tool that trains on pure synthetic data to handle real-world degradations including blur, noise, JPEG compression, and resize artifacts.",44,{"id":66,"uuid":67,"slug":68,"title":69,"description":70,"author_name":56,"view_count":71,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4664,"74fc6ef5-54cb-11f1-9bc6-00163e2b0d79","controlnet-add-spatial-control-diffusion-models-74fc6ef5","ControlNet — Add Spatial Control to Diffusion Models","ControlNet lets you add precise spatial conditioning such as edge maps, depth, and pose to Stable Diffusion, giving fine-grained control over AI image generation.",16,{"id":73,"uuid":74,"slug":75,"title":76,"description":77,"author_name":56,"view_count":78,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4618,"1a626bf6-548a-11f1-9bc6-00163e2b0d79","motion-canvas-create-animated-videos-code-1a626bf6","Motion Canvas — Create Animated Videos with Code","A TypeScript library and editor for creating publication-quality animated videos programmatically, combining the precision of code with a visual preview workflow.",38,{"id":80,"uuid":81,"slug":82,"title":83,"description":84,"author_name":22,"view_count":85,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4027,"f40e235a-5078-11f1-9bc6-00163e2b0d79","opencut-open-source-ai-video-editor-f40e235a","OpenCut — Open-Source AI Video Editor","An open-source alternative to CapCut for video editing with AI-assisted features, timeline editing, and professional export options.",88,{"id":87,"uuid":88,"slug":89,"title":90,"description":91,"author_name":92,"view_count":93,"vote_count":24,"lang_type":25,"type":26,"type_label":27},3602,"eff58004-95ca-5e16-bfc4-f85ec9b207c2","generative-media-skills-muapi-npx-skills-add","Generative Media Skills — muapi + npx skills add","Generative Media Skills is a multi-modal skill library: run image\u002Fvideo recipes via muapi-cli, installable into Claude Code\u002FCursor with `npx skills add`.","Skill Factory",92,"tokrepo install pack\u002Fai-video-generation-pack",{"pageType":96,"pageKey":8,"locale":97,"title":98,"metaDescription":99,"h1":100,"tldr":101,"bodyMarkdown":102,"faq":103,"schema":119,"internalLinks":125,"citations":138,"wordCount":151,"generatedAt":152},"pack","zh","AI 视频生成包 — 10 个工具搞定文生视频\u002F图生视频流水线","CogVideo \u002F Open-Sora \u002F Together AI \u002F Diffusers \u002F AnimateDiff \u002F Real-ESRGAN \u002F ControlNet \u002F Motion Canvas \u002F OpenCut \u002F 生成式媒体 Skill — 一套覆盖开源模型、商业 API、镜头控制、运动模块、超分放大的完整流水线，TokRepo 一键安装。","AI 视频生成包 — 从一句 prompt 到成片","给做文生视频\u002F图生视频的创作者和开发者准备的十件套。开源模型给你本地控制权和零单价成本，商业 API 桥接负责需要 Sora 级画质时的关键镜头，ControlNet + Motion Canvas 管镜头和运动，Real-ESRGAN 负责放大，再加一把剪辑器把片段串起来。","## 这个 pack 是干嘛的\n\nAI 视频已经不是「一个模型」的事，而是一条**流水线**：选模型 → 写 prompt → 关键帧约束 → 控制镜头\u002F运动 → 超分放大 → 剪辑。本 pack 把每个环节最值得装的一个挑出来，加起来十件 — **有主见，不求全** — 让你从空白 prompt 走到可看的成片，不用在十个 repo 的 README 之间反复横跳。\n\n这套刻意做了拆分：**开源模型负责本地控制、零单价成本**，加一个**商业 API 桥**用于真的需要 Sora 级画质且愿意付钱的时刻。绝大多数实际跑通的流水线都同时用两边。\n\n## 推荐安装顺序\n\n### 1. 选模型\n\n- **CogVideo**（#2458）— 文生视频 + 图生视频。智谱 THUDM 的成熟开源基线，单卡高显存能跑。要一个可复现的本地流水线，从这里起步。\n- **Open-Sora**（#109）— 复刻 Sora 风格的开源项目。长镜头和运动连贯性比 CogVideo 强，但硬件门槛更高。\n- **Diffusers**（#111）— Hugging Face 的模型 hub。一个 Python 接口同时加载 CogVideo \u002F Stable Video Diffusion \u002F HunyuanVideo \u002F Wan \u002F Mochi，新模型上线当周就能用。要统一接口就先装 Diffusers，其他模型当权重就行。\n- **Together AI Video Generation Skill**（#777）— 商业 API 桥。当你需要 Sora \u002F Veo \u002F Runway \u002F Pika 级画质又不想自己管 GPU 集群，就用它。按秒计费，当天可交付。\n\n### 2. 写 prompt\n\n- 规则跟图像生成一样，多加**运动动词**：「dolly forward」「orbit left」「static lock-off」。在电影数据上训练过的模型对电影词汇敏感。\n- 主体放在前 12 个 token 内 — 大多数文生视频模型 attention 仍然偏前部。\n\n### 3. 关键帧约束\n\n- **ControlNet**（#4664）— 喂入 pose \u002F depth \u002F canny 图，锁死构图。当你心里有一个特定取景、不想让模型自由发挥时用它。\n- 图生视频本身就把图作为关键帧 — 这种情况不需要额外 ControlNet。\n\n### 4. 加运动\n\n- **AnimateDiff**（#2463）— Stable Diffusion 家族的即插即用运动模块。把现成的图像生成 pipeline 动起来，不用重训。风格化\u002F动漫内容首选。\n- **Motion Canvas**（#4618）— 当你要的运动是确定性的（UI 演示、数据可视化、程序化镜头移动），不要跟扩散模型较劲，直接用代码写动画。\n\n### 5. 放大\n\n- **Real-ESRGAN**（#2495）— 实用的 4× 超分模型，支持视频。生成模型大多输出 512×512 或 720p，要交付 4K 就靠它。放在最后一步，编码之前。\n\n### 6. 剪辑\n\n- **OpenCut**（#4027）— 开源 AI 视频剪辑器。裁切、拼接、调色生成的片段。不用导出去闭源 NLE 走一遭。\n- **生成式媒体 Skill**（#3602）— muapi + npx skill 安装器，把十几个商业生成 API 统一在一个 CLI 后面。当 agent 需要调用「生成 5 秒片段」而不想每次选 vendor 时很好用。\n\n## 它们怎么协同\n\n```\nPrompt ─► CogVideo \u002F Open-Sora \u002F Diffusers \u002F Together API\n               │\n               ▼\n        原始 720p 片段\n               │\n   ControlNet ─┤ （可选：锁构图）\n   AnimateDiff ┤ （可选：给静图加运动）\n   Motion Canvas ┤ （可选：确定性镜头）\n               ▼\n        Real-ESRGAN  ─►  4K 放大片段\n               │\n               ▼\n            OpenCut\n               │\n               ▼\n         成片 mp4\n```\n\n关键分水岭：**扩散模型「幻觉」出运动，代码工具「指挥」出运动**。要惊喜和氛围用扩散（CogVideo \u002F Open-Sora \u002F AnimateDiff）；镜头轨迹一寸不能动、观众一眼看得出漂移就用 Motion Canvas。\n\n## 你会遇到的取舍\n\n- **本地 vs API** — 本地零单价但耗 GPU 时间和调参；API 出片快画质高，但按秒收费 + 配额限制。迭代用本地，关键镜头用 API。\n- **CogVideo vs Open-Sora** — CogVideo 安装更稳、显存门槛低；Open-Sora 在跑得通的时候片段更长更连贯。先用 CogVideo，等长度成为瓶颈再换。\n- **AnimateDiff vs 原生视频模型** — AnimateDiff 是把运动模块焊到 SD checkpoint 上（风格库巨大，连贯性一般）；原生视频模型端到端在视频上训练（运动干净，风格库少）。看内容：风格化 → AnimateDiff，写实 → CogVideo \u002F Open-Sora。\n- **Real-ESRGAN vs 付费超分** — Real-ESRGAN 免费、对 Web 交付够用；Topaz Video AI 这类付费工具在人脸细节上更锐，但要钱。先用 Real-ESRGAN 交付，被甲方挑出来再升级。\n\n## 常见踩坑\n\n- **显存数学** — CogVideo-5B 跑 720p 大约 24 GB。Open-Sora 经常要 40 GB+。租 GPU 之前先看 model card。\n- **跨帧 prompt 漂移** — 任何扩散模型的长镜头大约 3 秒后就开始角色 \u002F 光线漂移。3 秒为段生成，在 OpenCut 里拼，比硬刚 10 秒一镜到底省事。\n- **音频是独立环节** — 这套工具都不出匹配音轨。规划单独的 TTS \u002F SFX 流程，最后在 OpenCut 合成。\n- **商业 API 条款** — 每家商业生成商在商用、训练 opt-out、水印上规则都不一样。客户项目发稿前先看清 TOS。",[104,107,110,113,116],{"q":105,"a":106},"从 CogVideo 还是 Open-Sora 开始？","没有 40+ GB GPU 和「必须更长镜头」的明确理由，就从 CogVideo 开始。CogVideo 单卡 24 GB 能跑，文档更全，常见报错都有解。等 CogVideo 的片段长度上限成为瓶颈，再换 Open-Sora — 不要更早。",{"q":108,"a":109},"真的需要同时装开源模型和商业 API skill 吗？","实际跑通的流水线大多两个都装。本地模型给你零单价的迭代、确定的 seed、自由的实验空间 — 适合一个 prompt 试 50 个变体。商业 API（Together AI skill 或生成式媒体 Skill）给你 Sora \u002F Veo \u002F Runway 级的成片画质 — 适合最后那几个关键镜头。一边迭代一边交付，分工明确。",{"q":111,"a":112},"怎么控制镜头，不只是主体？","两条路。对扩散模型，把镜头动词写进 prompt（「slow dolly forward」「static lock-off」「orbit 90 degrees」），在电影 caption 上训练过的模型懂这套词汇。镜头需要精确取景或轨迹时，换 Motion Canvas 用代码写运动，再把扩散输出合成进框定好的镜头里。",{"q":114,"a":115},"ControlNet 不是给图像用的吗，怎么用在视频？","ControlNet 是把一个结构信号（pose \u002F depth \u002F canny）注入到扩散去噪步骤里。当那一步恰好是视频生成的第一帧时，整个片段都继承了这个构图。要让生成视频稳在指定构图里（比如产品镜头、角色固定姿势），这是最干净的办法 — 你不能让模型自由发挥版面。",{"q":117,"a":118},"一台单 GPU 机能不能跑完整条流水线？","能，前提是把各阶段串行而不是并行。先用 CogVideo 生成（24 GB），卸载，再跑 Real-ESRGAN（约 6 GB），最后 OpenCut 走 CPU。瓶颈在生成那一步，下游环节都很便宜。如果只有 16 GB 卡，要么降到 CogVideo 小尺寸版本，要么让生成走商业 API，把超分 + 剪辑留在本地。",{"@context":120,"@type":121,"name":13,"description":122,"numberOfItems":123,"inLanguage":124},"https:\u002F\u002Fschema.org","ItemList","覆盖开源视频生成模型、商业 API、镜头与运动控制、超分放大、剪辑组装五个环节的十件套，给做文生视频\u002F图生视频流水线的创作者和开发者。",10,"zh-CN",[126,130,134],{"url":127,"anchor":128,"reason":129},"\u002Fzh\u002Ftopics\u002Fvideo-production-ai","AI 视频生产线 pack","姊妹 pack 关注剪辑和 headless 生产流水线，不是生成模型",{"url":131,"anchor":132,"reason":133},"\u002Fzh\u002Fai-tools-for\u002Fcontent-creation","AI 内容创作工具集","更大范围的内容创作资产目录，可与生成的视频片段搭配使用",{"url":135,"anchor":136,"reason":137},"\u002Fzh\u002Ftopics","浏览其他主题 pack","还有 AI Agent \u002F MCP \u002F 多智能体框架等多个主题 pack",[139,143,147],{"claim":140,"source_name":141,"source_url":142},"CogVideo 是 THUDM 开源的文生视频与图生视频模型","CogVideo GitHub 仓库","https:\u002F\u002Fgithub.com\u002FTHUDM\u002FCogVideo",{"claim":144,"source_name":145,"source_url":146},"Open-Sora 是复刻 Sora 风格文生视频的开源项目","Open-Sora GitHub 仓库","https:\u002F\u002Fgithub.com\u002Fhpcaitech\u002FOpen-Sora",{"claim":148,"source_name":149,"source_url":150},"Real-ESRGAN 是用于通用图像与视频超分辨率的实用算法","Real-ESRGAN GitHub 仓库","https:\u002F\u002Fgithub.com\u002Fxinntao\u002FReal-ESRGAN",900,"2026-05-22T00:00:00Z"]