{"version":"1.0","workflow_uuid":"ca2016fb-173e-4cc4-aad3-749d66377e89","workflow_title":"vLLM — High-Throughput LLM Serving Engine","install_contract":{"version":"1.0","installReady":false,"title":"vLLM — High-Throughput LLM Serving Engine","summary":"vLLM is a high-throughput and memory-efficient LLM inference engine. 74.8K+ GitHub stars. PagedAttention, continuous batching, OpenAI-compatible API, multi-GPU serving. Apache 2.0.","assetType":"Scripts","pageUrl":"https://tokrepo.com/en/workflows/vllm-high-throughput-llm-serving-engine-ca2016fb","sourceUrl":"https://github.com/vllm-project","intendedFor":[],"firstActions":[],"agentFirstSteps":[],"targetPaths":[],"verification":[],"startingPoints":[],"example":"","successOutcome":"","boundaries":[],"askUserIf":["the current workspace stack cannot be matched to a safe upstream template","the target path is not the project root, or an existing file should be merged instead of overwritten"]}}