[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-ai-music-audio-generation-zh":3,"seo:pack:ai-music-audio-generation:zh":96},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":95},"ai-music-audio-generation","🎵","#BE185D","new","本周新建","AI 音乐与音频生成包","给用 AI 做音乐和音效的音乐人、播客主、创作者的十件资产：Bark \u002F AudioCraft 负责生成，Cartesia \u002F Chatterbox 出人声，MuseScore \u002F LMMS 编曲混音，Tone.js \u002F howler.js 上 Web，Demucs 做人声\u002F伴奏分离，Audacity 收尾母带 — 按生产管线顺序排列。",[16,28,35,43,50,58,65,72,81,88],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},279,"814b8972-5d48-4379-9756-9a3d8ed686f7","bark-ai-text-audio-music-effects-814b8972","Bark — AI Text-to-Audio with Music & Effects","Bark is a transformer text-to-audio model by Suno that generates speech, music, and sound effects. 39.1K+ GitHub stars. 12+ languages, 100+ voice presets, non-speech audio. MIT licensed.","Script Depot",201,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":22,"view_count":34,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4665,"8a0d7a57-54cb-11f1-9bc6-00163e2b0d79","audiocraft-ai-audio-generation-meta-8a0d7a57","AudioCraft — AI Audio Generation by Meta","AudioCraft is a PyTorch library from Meta Research providing code and pre-trained models for audio generation including music, sound effects, and audio compression.",13,{"id":36,"uuid":37,"slug":38,"title":39,"description":40,"author_name":41,"view_count":42,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2989,"48e00964-c223-46ba-a45e-3ef76fbce082","cartesia-sonic-tts-75ms-time-to-first-audio","Cartesia Sonic TTS — 75ms Time-to-First-Audio","Cartesia Sonic is a state-space-model TTS with 75ms time-to-first-audio. 100+ voices, 5s cloning, streaming WebSocket. Lowest-latency TTS.","Cartesia",84,{"id":44,"uuid":45,"slug":46,"title":47,"description":48,"author_name":22,"view_count":49,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4177,"a6af5d44-5293-11f1-9bc6-00163e2b0d79","chatterbox-state-art-open-source-text-speech-a6af5d44","Chatterbox — State-of-the-Art Open Source Text-to-Speech","A high-quality open-source TTS model by Resemble AI that delivers natural-sounding speech with fine-grained control over prosody, emotion, and expressiveness.",67,{"id":51,"uuid":52,"slug":53,"title":54,"description":55,"author_name":56,"view_count":57,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1722,"7185dcfa-3b20-11f1-9bc6-00163e2b0d79","musescore-free-open-source-music-notation-software-7185dcfa","MuseScore — Free Open Source Music Notation Software","MuseScore is a free, open-source music notation application for composing, arranging, and engraving sheet music. It runs on Windows, macOS, and Linux, supports MusicXML import\u002Fexport, MIDI playback, and produces professional-quality scores.","AI Open Source",132,{"id":59,"uuid":60,"slug":61,"title":62,"description":63,"author_name":22,"view_count":64,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1994,"c9a9b225-3ecd-11f1-9bc6-00163e2b0d79","lmms-free-cross-platform-digital-audio-workstation-c9a9b225","LMMS — Free Cross-Platform Digital Audio Workstation","LMMS (Linux MultiMedia Studio) is a free, open-source digital audio workstation for music production. It includes synthesizers, sample playback, beat sequencing, and an effects chain, providing a complete environment for creating music without any cost.",128,{"id":66,"uuid":67,"slug":68,"title":69,"description":70,"author_name":22,"view_count":71,"vote_count":24,"lang_type":25,"type":26,"type_label":27},3646,"09935623-4ee7-11f1-9bc6-00163e2b0d79","tone-js-web-audio-framework-interactive-music-09935623","Tone.js — Web Audio Framework for Interactive Music","A TypeScript framework built on the Web Audio API that provides scheduling, synthesis, and effects for creating interactive music in the browser.",54,{"id":73,"uuid":74,"slug":75,"title":76,"description":77,"author_name":56,"view_count":78,"vote_count":24,"lang_type":25,"type":79,"type_label":80},4171,"d9fc60d5-524e-11f1-9bc6-00163e2b0d79","howler-js-cross-browser-audio-library-web-d9fc60d5","howler.js — Cross-Browser Audio Library for the Web","A JavaScript audio library that provides a simple, consistent API for playing sound in any browser using the Web Audio API with HTML5 Audio fallback.",27,"script","Script",{"id":82,"uuid":83,"slug":84,"title":85,"description":86,"author_name":22,"view_count":87,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4669,"d9e3e25f-54cb-11f1-9bc6-00163e2b0d79","demucs-ai-powered-music-source-separation-d9e3e25f","Demucs — AI-Powered Music Source Separation","Demucs is a state-of-the-art music source separation model from Meta Research that splits audio tracks into vocals, drums, bass, and other instrument stems.",24,{"id":89,"uuid":90,"slug":91,"title":92,"description":93,"author_name":56,"view_count":94,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1720,"44f450b6-3b20-11f1-9bc6-00163e2b0d79","audacity-free-cross-platform-audio-editor-44f450b6","Audacity — Free Cross-Platform Audio Editor","Audacity is a free, open-source digital audio editor and recorder for Windows, macOS, and Linux. It supports multi-track editing, a wide range of audio formats, real-time effects, and plugin extensibility for recording, editing, and mastering audio.",117,"tokrepo install pack\u002Fai-music-audio-generation",{"pageType":97,"pageKey":8,"locale":98,"title":99,"metaDescription":100,"h1":101,"tldr":102,"bodyMarkdown":103,"faq":104,"schema":120,"internalLinks":126,"citations":139,"wordCount":152,"generatedAt":153},"pack","zh","AI 音乐与音频生成包 — 10 件资产从 Prompt 到母带","Bark \u002F AudioCraft \u002F Cartesia Sonic \u002F Chatterbox \u002F MuseScore \u002F LMMS \u002F Tone.js \u002F howler.js \u002F Demucs \u002F Audacity — 一条从文字 prompt 到母带 WAV 的真实生产管线。含安装顺序、混音取舍、每个模型真正擅长什么。","AI 音乐与音频生成包 — 从 Prompt 到一首成品的完整链路","十件开源或开放 API 资产，按真实生产管线排列：输入（prompt、歌词、MIDI）→ 生成（音乐、人声、音效）→ 编排（DAW、记谱、Web）→ 混音（人声分离、修复）→ 母带导出。绕开 Suno\u002FUdio 网页 UI，把版权和迭代权握在自己手里。","## 这个 pack 包含什么\n\n这是给**用 AI 生成音频、并且要在自己控制的工具里收尾**的音乐人、播客主、游戏\u002F网页创作者的一套配置 — 不是把母带锁在某个 SaaS 网页 app 里。每件资产要么完全开源，要么有真正的 API（不接受网页复制粘贴流）。十件里有九件是 MIT 或 Apache 协议。\n\n整套覆盖音频生产管线的全部 5 个阶段。**不需要每个都装** — 根据你要的产出（音乐 \u002F 人声 \u002F 音效 \u002F 乐谱 \u002F Web 播放）选对应那一行，串起来跑。\n\n## 按生产管线顺序安装\n\n### 阶段 1 — 生成\n\n1. **Bark** — Suno 研究团队出的 transformer 文本到音频模型。从文字 prompt 生成 12+ 语言的语音、音乐、背景噪音、音效，支持 `[laughs]` `[music]` 等非语音 tag。MIT 协议，本地跑约需 12 GB 显存。需要一个万金油模型先粗略出活就用它。\n2. **AudioCraft（MusicGen）** — Meta 的 PyTorch 音乐生成库。比 Bark 的器乐音乐连贯性更高，可以用文字 prompt 或哼唱旋律条件化生成。**你真正要的是音乐而不是人声时**选它。\n3. **Cartesia Sonic TTS** — 状态空间模型 TTS，首音节响应 75ms，100+ 声音、5 秒克隆、流式 WebSocket。云 API。需要实时人声（直播 agent、歌词试唱快速迭代）时用。\n4. **Chatterbox** — Resemble AI 出的开源 TTS，对韵律、情感、表现力可精细控制。当你要自托管、希望出来的歌词或旁白不像 GPS 语音播报时，这是 Cartesia\u002FElevenLabs 的本地替代。\n\n### 阶段 2 — 编排\n\n5. **MuseScore** — 免费开源记谱软件。把 AI 生成的 MIDI\u002F旋律想法变成正式编曲的桥梁。导出 MIDI、MusicXML、音频。\n6. **LMMS** — 跨平台免费 DAW，内置合成器、节拍序列器、效果链。**AI 生成的素材在这里才变成歌**。不想为了叠四轨花 200 美元买 FL Studio \u002F Ableton 时的开源替代。\n\n### 阶段 3 — 上 Web（可选，给要出货的创作者）\n\n7. **Tone.js** — Web Audio 互动音乐框架。当你的产出不是 WAV 而是**体验**（生成式网页音乐、互动 loop、浏览器里的乐器）时用。\n8. **howler.js** — 跨浏览器音频播放库。和 Tone.js 配套用（Tone 负责合成，Howler 负责播放成品）。三行 API 解决你本来要花一周末调试的所有浏览器音频 bug。\n\n### 阶段 4 — 修复 \u002F 音源分离\n\n9. **Demucs** — Meta 的 AI 音乐源分离。把任意一首歌分成 鼓 \u002F 贝斯 \u002F 人声 \u002F 其他 四轨。这是人声去除\u002F伴奏提取的那一步（任何歌做卡拉 OK、从 AI 生成的混音里抽出人声、修补串音）。\n\n### 阶段 5 — 母带 & 导出\n\n10. **Audacity** — 全地球播客和 YouTube 旁白配音都在用的跨平台音频编辑器。降噪、归一化、EQ、限制器、导出 MP3\u002FWAV\u002FFLAC。**故意保守** — 母带应该可预测。\n\n## 它们怎么串起来\n\n```\n文字 prompt \u002F 歌词\n   │\n   ├─ Bark（任意音频）──┐\n   ├─ MusicGen（音乐）─┤\n   ├─ Cartesia（人声）─┼─→ 分轨 WAV\n   └─ Chatterbox（人声）┘\n                          │\n     ┌────────────────────┘\n     ▼\nMuseScore（记谱 \u002F MIDI 草稿）→ LMMS（DAW 编排 + 叠轨）\n     │\n     ├─ Demucs（如需可再做分离 \u002F 抽轨）\n     │\n     ▼\nAudacity（清理、EQ、限制器、母带）\n     │\n     ├─ WAV \u002F MP3 → 上 Spotify \u002F YouTube \u002F 播客主机\n     └─ Tone.js + howler.js → 嵌进网页\n```\n\n整条链路最关键的转折点是**阶段 2 的 LMMS** — 没有 DAW，AI 生成的素材就停留在「一次性新鲜玩意」；有 DAW，四个 Bark\u002FMusicGen take 才能拼成一首结构完整的歌。\n\n## 你会遇到的取舍\n\n- **Bark vs MusicGen** — Bark 更宽（人声 + 音乐 + 音效）但音乐更松散。MusicGen 更窄（器乐音乐）但更连贯。要的是**完整歌曲**：MusicGen 出伴奏 + Bark 或 Cartesia 出人声。要的是播客 intro、音效、氛围音：Bark 单挑就够。\n- **Cartesia vs Chatterbox** — Cartesia 最快（首音节 75 ms）声音也最好，但云 API 按用量计费。Chatterbox 自托管不按次收费。**Cartesia 适合生产环境的实时 agent**；**Chatterbox 适合批量出人声、对延迟不敏感**的场景。\n- **Tone.js vs howler.js** — Tone.js 做合成（振荡器、乐器、调度）。Howler.js 跨浏览器播放成品文件。多数项目两个都要。如果你不在运行时生成音频，直接 Howler 就行。\n- **Demucs 进攻 vs 防御用法** — 进攻：从任意参考曲拉分轨研究或重混。防御：当 AI 生成的人声和伴奏同一次渲染时，把它们拆开避免共振伪影。\n- **Suno\u002FUdio 网页 UI vs 本套** — Suno 网页 app 出 30 秒 meme 更快。但一旦你想**迭代**（只重生成副歌）、**拥有母带**（无 DRM、自己的 WAV）、**批量编曲**（隔夜跑 50 个 prompt），本套就赢了。\n\n## 常见踩坑\n\n- **Bark 显存** — 全模型要 12 GB 显存。8 GB 显卡设 `SUNO_USE_SMALL_MODELS=True`。CPU 也能跑但慢 10 倍。\n- **AudioCraft 协议陷阱** — MusicGen 某些 checkpoint 是 CC-BY-NC（禁商用）。上架前**读 model card**确认协议。\n- **Demucs CPU 太慢** — 4 分钟歌曲 CPU 跑约 3 分钟，3060 跑 20 秒。批量隔夜走 CPU；交互式必须 GPU。\n- **Audacity 响度战争** — 限制器别推过 -1 dBTP。母带太响在 Spotify 会被自动衰减，反而难听。\n- **Cartesia 流式 + 浏览器** — WebSocket 音频块需要仔细缓冲；客户端用 Tone.js 或 Howler.js 播放，别用裸 `\u003Caudio>` 标签。",[105,108,111,114,117],{"q":106,"a":107},"这套真能替代 Suno 或 Udio 吗？","出 30 秒一次性片段，替代不了 — Suno 网页 app 更快。但其他场景（只重做副歌、握住母带文件、批量生成 50 个 take、人声和伴奏分别调音）都能替代，而且更强。本套给的是制作人的工作流，不是老虎机式 UI。MusicGen + Bark 覆盖生成层；LMMS 给你 Suno UI 隐藏掉的编排层；Demucs 让你拉出 Suno 永远不开放的分轨。",{"q":109,"a":110},"AI 唱歌应该用哪个模型？","这里面没有任何一个是专门为**唱歌**调过的 — 全部都是语音模型。要 AI 唱：Bark 配合特定声音预设 + `[singing]` tag 是创作性最松的选项；Cartesia 和 Chatterbox 出来更可控但明显是「说话腔」，可以在 LMMS 里变调假装旋律，但听感像在自动 tune 里说话。真正的 AI 唱歌目前仍然要走 Suno 的托管模型。本 pack 在这点上不藏着掖着。",{"q":112,"a":113},"纯本地路径的最低硬件要求是多少？","Apple Silicon Mac（M1 及以上）或者 12 GB 显存的台式机（RTX 3060 及以上）能在可用速度下本地跑 Bark \u002F MusicGen \u002F Demucs \u002F Chatterbox。8 GB 显卡开 small-model 模式。纯 CPU 这四个也能跑，但比 GPU 慢 10 倍 — 隔夜批量没问题，交互迭代会很痛。",{"q":115,"a":116},"怎么从 AI 生成的音乐里拿到干净的分轨？","用 MusicGen 对同一个 prompt 生成 4 个短变体，每个都过一遍 Demucs 拆成 鼓 \u002F 贝斯 \u002F 人声 \u002F 其他，然后在 LMMS 里把好的部分再叠起来。**这是核心套路**：生成模型给你的是过得去的完整混音，但 Demucs 让你从第 3 个 take 抽出唯一好听的鼓 loop，从第 1 个 take 抽出贝斯。比连续重 roll 几小时等整个 take 落地干净得多。",{"q":118,"a":119},"Tone.js 和 howler.js 都需要装吗？","只有当你要把音频部署到网页才需要。Howler.js 负责播放成品文件（Audacity 出来的母带 WAV），跨浏览器自动播放兼容性好。Tone.js 负责浏览器里合成或调度音频（生成式音乐、互动乐器）。静态音乐网站：只要 Howler。生成式 Web 乐器：两个都要 — Tone 合成，Howler 播放预渲染的采样。",{"@context":121,"@type":122,"name":13,"description":123,"numberOfItems":124,"inLanguage":125},"https:\u002F\u002Fschema.org","ItemList","十件开源或开放 API 音频工具，构成一条真实的音频生产管线：生成、编排、混音、母带、出货。",10,"zh-CN",[127,131,135],{"url":128,"anchor":129,"reason":130},"\u002Fzh\u002Fai-tools-for\u002Fcontent-creation","AI 内容创作工具集","音乐音频和创作者套件其它部分（脚本、视频、缩略图）协同使用",{"url":132,"anchor":133,"reason":134},"\u002Fzh\u002Ffeatured","TokRepo 精选资产","浏览本 pack 之外的精选目录",{"url":136,"anchor":137,"reason":138},"\u002Fzh\u002Ftopics","浏览其他主题 pack","内容创作者、视频生产、创意 AI 工作流相关主题包",[140,144,148],{"claim":141,"source_name":142,"source_url":143},"Bark 是 Suno 研究团队出的 transformer 文本到音频模型","Bark GitHub 仓库","https:\u002F\u002Fgithub.com\u002Fsuno-ai\u002Fbark",{"claim":145,"source_name":146,"source_url":147},"AudioCraft 是 Meta 的音频生成库，含 MusicGen","AudioCraft GitHub 仓库","https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Faudiocraft",{"claim":149,"source_name":150,"source_url":151},"Demucs 是 Meta 出的音乐源分离模型","Demucs GitHub 仓库","https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdemucs",900,"2026-05-22T12:00:00Z"]