[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"pack-detail-ai-music-audio-generation-es":3,"seo:pack:ai-music-audio-generation:es":96},{"code":4,"message":5,"data":6},200,"操作成功",{"pack":7},{"slug":8,"icon":9,"tone":10,"status":11,"status_label":12,"title":13,"description":14,"items":15,"install_cmd":95},"ai-music-audio-generation","🎵","#BE185D","new","Nuevo · esta semana","Pack de Generación de Música y Audio con IA","Diez picks para el músico, podcaster y creador que genera música o sonido con IA: Bark y AudioCraft para generación, Cartesia y Chatterbox para voces, MuseScore y LMMS para arreglos, Tone.js y howler.js para la web, Demucs para separar fuentes, Audacity para limpieza y masterización — en orden de instalación.",[16,28,35,43,50,58,65,72,81,88],{"id":17,"uuid":18,"slug":19,"title":20,"description":21,"author_name":22,"view_count":23,"vote_count":24,"lang_type":25,"type":26,"type_label":27},279,"814b8972-5d48-4379-9756-9a3d8ed686f7","bark-ai-text-audio-music-effects-814b8972","Bark — AI Text-to-Audio with Music & Effects","Bark is a transformer text-to-audio model by Suno that generates speech, music, and sound effects. 39.1K+ GitHub stars. 12+ languages, 100+ voice presets, non-speech audio. MIT licensed.","Script Depot",201,0,"en","skill","Skill",{"id":29,"uuid":30,"slug":31,"title":32,"description":33,"author_name":22,"view_count":34,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4665,"8a0d7a57-54cb-11f1-9bc6-00163e2b0d79","audiocraft-ai-audio-generation-meta-8a0d7a57","AudioCraft — AI Audio Generation by Meta","AudioCraft is a PyTorch library from Meta Research providing code and pre-trained models for audio generation including music, sound effects, and audio compression.",21,{"id":36,"uuid":37,"slug":38,"title":39,"description":40,"author_name":41,"view_count":42,"vote_count":24,"lang_type":25,"type":26,"type_label":27},2989,"48e00964-c223-46ba-a45e-3ef76fbce082","cartesia-sonic-tts-75ms-time-to-first-audio","Cartesia Sonic TTS — 75ms Time-to-First-Audio","Cartesia Sonic is a state-space-model TTS with 75ms time-to-first-audio. 100+ voices, 5s cloning, streaming WebSocket. Lowest-latency TTS.","Cartesia",93,{"id":44,"uuid":45,"slug":46,"title":47,"description":48,"author_name":22,"view_count":49,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4177,"a6af5d44-5293-11f1-9bc6-00163e2b0d79","chatterbox-state-art-open-source-text-speech-a6af5d44","Chatterbox — State-of-the-Art Open Source Text-to-Speech","A high-quality open-source TTS model by Resemble AI that delivers natural-sounding speech with fine-grained control over prosody, emotion, and expressiveness.",81,{"id":51,"uuid":52,"slug":53,"title":54,"description":55,"author_name":56,"view_count":57,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1722,"7185dcfa-3b20-11f1-9bc6-00163e2b0d79","musescore-free-open-source-music-notation-software-7185dcfa","MuseScore — Free Open Source Music Notation Software","MuseScore is a free, open-source music notation application for composing, arranging, and engraving sheet music. It runs on Windows, macOS, and Linux, supports MusicXML import\u002Fexport, MIDI playback, and produces professional-quality scores.","AI Open Source",132,{"id":59,"uuid":60,"slug":61,"title":62,"description":63,"author_name":22,"view_count":64,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1994,"c9a9b225-3ecd-11f1-9bc6-00163e2b0d79","lmms-free-cross-platform-digital-audio-workstation-c9a9b225","LMMS — Free Cross-Platform Digital Audio Workstation","LMMS (Linux MultiMedia Studio) is a free, open-source digital audio workstation for music production. It includes synthesizers, sample playback, beat sequencing, and an effects chain, providing a complete environment for creating music without any cost.",128,{"id":66,"uuid":67,"slug":68,"title":69,"description":70,"author_name":22,"view_count":71,"vote_count":24,"lang_type":25,"type":26,"type_label":27},3646,"09935623-4ee7-11f1-9bc6-00163e2b0d79","tone-js-web-audio-framework-interactive-music-09935623","Tone.js — Web Audio Framework for Interactive Music","A TypeScript framework built on the Web Audio API that provides scheduling, synthesis, and effects for creating interactive music in the browser.",56,{"id":73,"uuid":74,"slug":75,"title":76,"description":77,"author_name":56,"view_count":78,"vote_count":24,"lang_type":25,"type":79,"type_label":80},4171,"d9fc60d5-524e-11f1-9bc6-00163e2b0d79","howler-js-cross-browser-audio-library-web-d9fc60d5","howler.js — Cross-Browser Audio Library for the Web","A JavaScript audio library that provides a simple, consistent API for playing sound in any browser using the Web Audio API with HTML5 Audio fallback.",32,"script","Script",{"id":82,"uuid":83,"slug":84,"title":85,"description":86,"author_name":22,"view_count":87,"vote_count":24,"lang_type":25,"type":26,"type_label":27},4669,"d9e3e25f-54cb-11f1-9bc6-00163e2b0d79","demucs-ai-powered-music-source-separation-d9e3e25f","Demucs — AI-Powered Music Source Separation","Demucs is a state-of-the-art music source separation model from Meta Research that splits audio tracks into vocals, drums, bass, and other instrument stems.",35,{"id":89,"uuid":90,"slug":91,"title":92,"description":93,"author_name":56,"view_count":94,"vote_count":24,"lang_type":25,"type":26,"type_label":27},1720,"44f450b6-3b20-11f1-9bc6-00163e2b0d79","audacity-free-cross-platform-audio-editor-44f450b6","Audacity — Free Cross-Platform Audio Editor","Audacity is a free, open-source digital audio editor and recorder for Windows, macOS, and Linux. It supports multi-track editing, a wide range of audio formats, real-time effects, and plugin extensibility for recording, editing, and mastering audio.",119,"tokrepo install pack\u002Fai-music-audio-generation",{"pageType":97,"pageKey":8,"locale":25,"title":98,"metaDescription":99,"h1":100,"tldr":101,"bodyMarkdown":102,"faq":103,"schema":119,"internalLinks":125,"citations":138,"wordCount":151,"generatedAt":152},"pack","AI Music & Audio Generation Pack — 10 Open Tools for Musicians & Creators","Bark, AudioCraft, Cartesia Sonic, Chatterbox, MuseScore, LMMS, Tone.js, howler.js, Demucs, Audacity — a real production pipeline from text prompt to mastered WAV. Install order, mixing tradeoffs, and what each model is actually good at.","AI Music & Audio Generation Pack — From Prompt to Mastered Track","Ten open and open-API picks arranged as a real pipeline: input (prompts, lyrics, MIDI) → generate (music, voice, SFX) → arrange (DAW, notation, web) → mix (source separation, repair) → master & export. Skip Suno\u002FUdio's web UI — own the stack.","## What's in this pack\n\nThis is the rig for a musician, podcaster, or game\u002Fweb creator who wants to **generate audio with AI and finish it in tools they control** — not lock the master to a SaaS web app. Every pick is either fully open-source or has a real API (no copy-paste-from-website workflows). Nine of ten are MIT or Apache-licensed.\n\nThe stack covers all five stages of an audio production pipeline. You don't need every tool — pick the row that matches your output (music, voice, SFX, score, web playback) and chain through.\n\n## Install in this pipeline order\n\n### Stage 1 — Generate\n\n1. **Bark** — transformer text-to-audio by Suno's research team. Speech, music, background noise, and sound effects from text prompts in 12+ languages, with non-speech tags like `[laughs]` and `[music]`. MIT licensed, runs locally on ~12 GB VRAM. Start here when you want one model that does everything roughly.\n2. **AudioCraft (MusicGen)** — Meta's PyTorch library for music and sound effect generation. Higher musical coherence than Bark for instrumental tracks, conditioned on text prompts or melody. The right pick when you specifically want *music*, not voice.\n3. **Cartesia Sonic TTS** — state-space-model voice with 75 ms time-to-first-audio, 100+ voices, 5 s cloning, streaming WebSocket. Cloud API. Use this when you need real-time vocal delivery (live agents, fast iteration on lyric takes).\n4. **Chatterbox** — open-source TTS by Resemble AI with fine-grained control over prosody, emotion, expressiveness. The self-hosted alternative to Cartesia\u002FElevenLabs when you want lyric or narration vocals that don't sound like a GPS voice.\n\n### Stage 2 — Arrange\n\n5. **MuseScore** — free open-source notation. The bridge between generated MIDI\u002Fmelody ideas and a real arrangement. Export to MIDI, MusicXML, audio.\n6. **LMMS** — free cross-platform DAW with built-in synths, beat sequencer, and effects chain. Where AI-generated stems become a song. The open alternative to FL Studio \u002F Ableton when you don't want to pay $200 just to layer four tracks.\n\n### Stage 3 — Mix on the web (optional, for shippable creators)\n\n7. **Tone.js** — Web Audio framework for interactive music. Use it when your output isn't a WAV but an *experience* (generative web music, interactive loops, browser instruments).\n8. **howler.js** — cross-browser audio playback library. Pair with Tone.js (Tone for synthesis, Howler for playback of finished assets). Three-line API that solves every browser audio bug you'd otherwise spend a weekend on.\n\n### Stage 4 — Repair \u002F Source Separation\n\n9. **Demucs** — AI music source separation by Meta. Splits any track into drums \u002F bass \u002F vocals \u002F other. The vocal removal step (karaoke from anything, isolate AI-generated vocals from generated backing, fix bleed in mixes).\n\n### Stage 5 — Master & Export\n\n10. **Audacity** — the cross-platform audio editor that ships every podcast and YouTube voiceover on Earth. Noise reduction, normalization, EQ, limiter, export to MP3\u002FWAV\u002FFLAC. Boring on purpose — the master should be predictable.\n\n## How they chain together\n\n```\nText prompt \u002F lyrics\n   │\n   ├─ Bark (any audio) ──┐\n   ├─ MusicGen (music) ──┤\n   ├─ Cartesia (voice) ──┼─→ stems (WAV)\n   └─ Chatterbox (voice) ┘\n                          │\n     ┌────────────────────┘\n     ▼\nMuseScore (score \u002F MIDI ideas) → LMMS (DAW arrange + layer)\n     │\n     ├─ Demucs (separate \u002F extract stems if needed)\n     │\n     ▼\nAudacity (cleanup, EQ, limiter, master)\n     │\n     ├─ WAV \u002F MP3 → ship to Spotify \u002F YouTube \u002F podcast host\n     └─ Tone.js + howler.js → ship to a web page\n```\n\nThe critical hinge is **Stage 2 (LMMS)** — without a DAW, generated stems stay one-shot novelties. With a DAW, four Bark\u002FMusicGen takes become a real song with structure.\n\n## Tradeoffs you'll hit\n\n- **Bark vs MusicGen** — Bark is broader (voice + music + SFX) but musically looser. MusicGen is narrower (instrumental music) but more coherent. If your output is *songs*, use MusicGen for backing and Bark or Cartesia for vocals. If your output is podcast intros, sound effects, or atmosphere, Bark alone is enough.\n- **Cartesia vs Chatterbox** — Cartesia is fastest (75 ms TTF audio) and best-sounding, but cloud API with usage costs. Chatterbox is self-hosted with no per-request fee. Cartesia for production live agents; Chatterbox for batch vocal generation where latency doesn't matter.\n- **Tone.js vs howler.js** — Tone.js synthesizes (oscillators, instruments, scheduling). Howler.js plays back finished files cross-browser. Most projects need both. If you're not generating audio at runtime, just use Howler.\n- **Demucs as offensive vs defensive tool** — offensive: pull stems out of any reference track to study or remix. Defensive: separate AI-generated vocals from AI-generated backing when they share artifacts in the same render.\n- **Suno\u002FUdio web UI vs this stack** — Suno's web app is faster for a single 30-second meme. This stack wins the moment you want to *iterate* (regenerate just the chorus), *own the master* (no DRM, your WAV), or *compose at scale* (batch 50 prompts overnight).\n\n## Common pitfalls\n\n- **Bark VRAM** — full model needs ~12 GB VRAM. On 8 GB GPUs set `SUNO_USE_SMALL_MODELS=True`. CPU mode works but is 10× slower.\n- **AudioCraft license confusion** — MusicGen weights are CC-BY-NC for some checkpoints. Read the model card before you ship commercially.\n- **Demucs is slow on CPU** — a 4-minute song takes ~3 minutes on CPU, ~20 seconds on a 3060. Batch overnight on CPU; interactive only with GPU.\n- **Audacity loudness war** — don't push the limiter past -1 dBTP. Loud masters that distort on Spotify get auto-attenuated.\n- **Cartesia streaming + browser** — WebSocket audio chunks need careful buffering; use Tone.js or Howler.js for client-side playback rather than raw `\u003Caudio>` tags.",[104,107,110,113,116],{"q":105,"a":106},"Can I actually replace Suno or Udio with this stack?","For one-off 30-second clips, no — Suno's web app is faster. For everything else (iterating just the chorus, owning the master file, batch generating 50 takes, fine-tuning vocals separately from backing), yes. The stack gives you a producer's workflow instead of a slot-machine UI. MusicGen and Bark together cover the generation surface; LMMS gives you the arrangement layer Suno's UI hides; Demucs lets you pull stems Suno never exposes.",{"q":108,"a":109},"Which voice model should I pick for AI singing?","None of these are tuned for *singing* specifically — they're speech models. For sung vocals, Bark with the right voice preset and `[singing]` tags is the loose creative option. Cartesia and Chatterbox produce more controlled but distinctly spoken-sounding output; you can pitch-shift them in LMMS to fake melody but the result feels like talking through autotune. Real AI singing today still routes through Suno's hosted model. This pack is honest about that gap.",{"q":111,"a":112},"What's the minimum hardware to run the local-only path?","Apple Silicon Mac (M1+) or a desktop with 12 GB VRAM (RTX 3060 or better) runs Bark, MusicGen, Demucs, and Chatterbox locally at usable speeds. On 8 GB cards use small-model flags. CPU-only is possible for all four but expect 10× slower generation — fine for overnight batches, painful for iteration.",{"q":114,"a":115},"How do I get clean stems out of AI-generated music?","Generate four short variations of the same prompt in MusicGen, run each through Demucs to separate drums \u002F bass \u002F vocals \u002F other, then re-layer the best parts in LMMS. This is the cheat code: generation models give you mediocre full mixes, but Demucs lets you cherry-pick the one good drum line from take 3 and the bass from take 1. Cleaner than re-rolling for hours hoping the whole take lands.",{"q":117,"a":118},"Do I need both Tone.js and howler.js?","Only if you're shipping audio to a website. Howler.js is for playing finished files (your mastered WAV from Audacity) with reliable cross-browser autoplay handling. Tone.js is for synthesizing or sequencing audio in the browser (generative music, interactive instruments). Static music site: Howler only. Generative web instrument: both — Tone synthesizes, Howler plays back any baked samples.",{"@context":120,"@type":121,"name":122,"description":123,"numberOfItems":124,"inLanguage":25},"https:\u002F\u002Fschema.org","ItemList","AI Music & Audio Generation Pack","Ten open-source and open-API tools forming a real audio production pipeline: generate, arrange, mix, master, ship.",10,[126,130,134],{"url":127,"anchor":128,"reason":129},"\u002Fen\u002Fai-tools-for\u002Fcontent-creation","AI content creation tools","Music and audio sit alongside the broader creator stack — script, video, thumbnail",{"url":131,"anchor":132,"reason":133},"\u002Fen\u002Ffeatured","Featured assets on TokRepo","Browse the full curated catalog beyond this pack",{"url":135,"anchor":136,"reason":137},"\u002Fen\u002Ftopics","Browse other topic packs","Related packs for content creators, video production, and creative AI workflows",[139,143,147],{"claim":140,"source_name":141,"source_url":142},"Bark is a transformer text-to-audio model by Suno's research team","Bark GitHub","https:\u002F\u002Fgithub.com\u002Fsuno-ai\u002Fbark",{"claim":144,"source_name":145,"source_url":146},"AudioCraft is Meta's library for audio generation including MusicGen","AudioCraft GitHub","https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Faudiocraft",{"claim":148,"source_name":149,"source_url":150},"Demucs is a music source separation model from Meta","Demucs GitHub","https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdemucs",920,"2026-05-22T12:00:00Z"]