{"version":"1.0","workflow_uuid":"0df4d2b1-45bd-11f1-9bc6-00163e2b0d79","workflow_title":"LM Evaluation Harness — Unified LLM Benchmarking Framework","install_contract":{"version":"1.0","installReady":false,"title":"LM Evaluation Harness — Unified LLM Benchmarking Framework","summary":"EleutherAI's framework for reproducible evaluation of language models across hundreds of benchmarks, providing the standard evaluation backend used by the Open LLM Leaderboard and research papers.","assetType":"Scripts","pageUrl":"https://tokrepo.com/en/workflows/asset-0df4d2b1","sourceUrl":"https://github.com/EleutherAI/lm-evaluation-harness","intendedFor":[],"firstActions":[],"agentFirstSteps":[],"targetPaths":[],"verification":[],"startingPoints":[],"example":"","successOutcome":"","boundaries":[],"askUserIf":["the current workspace stack cannot be matched to a safe upstream template","the target path is not the project root, or an existing file should be merged instead of overwritten"]}}