{"version":"1.0","workflow_uuid":"c2683056-42b9-11f1-9bc6-00163e2b0d79","workflow_title":"Flash Attention — Fast Memory-Efficient Exact Attention for Transformers","install_contract":{"version":"1.0","installReady":false,"title":"Flash Attention — Fast Memory-Efficient Exact Attention for Transformers","summary":"Flash Attention is a CUDA kernel library that computes exact scaled dot-product attention 2-4x faster and with up to 20x less memory than standard implementations by using IO-aware tiling to minimize GPU memory reads and writes.","assetType":"Scripts","pageUrl":"https://tokrepo.com/en/workflows/c2683056-42b9-11f1-9bc6-00163e2b0d79","sourceUrl":"https://github.com/Dao-AILab/flash-attention","intendedFor":[],"firstActions":[],"agentFirstSteps":[],"targetPaths":[],"verification":[],"startingPoints":[],"example":"","successOutcome":"","boundaries":[],"askUserIf":["the current workspace stack cannot be matched to a safe upstream template","the target path is not the project root, or an existing file should be merged instead of overwritten"]}}