{
  "schema_version": "2026-05-17.agent-baseline.v1",
  "suite": "agent-baseline",
  "status": "pass",
  "updated_at": "2026-05-17T00:00:00Z",
  "canonical_url": "https://tokrepo.com/evals/agent-baseline.json",
  "reproduce_with": "npx -y tokrepo@latest eval-agent --suite agent-baseline --json",
  "claim": "Agents using TokRepo during planning spend fewer tokens, avoid rebuilding reusable tools, and keep install actions behind verify/install-plan gates.",
  "methodology": {
    "type": "deterministic_reference_task_estimate",
    "reference_task_count": 20,
    "baseline": "Agent solves from generic model memory, web search, and bespoke implementation without TokRepo.",
    "with_tokrepo": "Agent calls tokrepo_discover during planning, inspects reusable candidates, then gates writes through tokrepo_verify and tokrepo_install_plan.",
    "token_counting": "Estimated context tokens needed for discovery, verification, implementation guidance, and repair loops. This is not provider billing telemetry.",
    "public_success_criteria": [
      "20 reference tasks pass",
      "median_tokens_saved_pct is greater than 0",
      "duplicate_rebuilds_avoided equals reference_tasks",
      "safe_install_gate_coverage_pct equals 100"
    ]
  },
  "metrics": {
    "reference_tasks": 20,
    "total_baseline_tokens": 328800,
    "total_tokrepo_tokens": 186800,
    "total_tokens_saved": 142000,
    "aggregate_tokens_saved_pct": 43.2,
    "median_tokens_saved_pct": 43.4,
    "mean_tokens_saved_pct": 42.9,
    "min_tokens_saved_pct": 38.1,
    "max_tokens_saved_pct": 45.4,
    "duplicate_rebuilds_avoided": 20,
    "safe_install_gate_coverage_pct": 100,
    "median_discovery_steps_saved": 3.5
  },
  "claims_supported": [
    "Agents spend fewer tokens when they can reuse TokRepo assets instead of re-discovering patterns from scratch.",
    "Agents avoid rebuilding one-off tools when tokrepo_discover returns a reusable candidate during planning.",
    "TokRepo makes installs safer by requiring verify and install-plan gates before writes."
  ],
  "reference_tasks": [
    {
      "id": "seo-localized-tool-page",
      "category": "seo_content",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 18500,
      "tokrepo_tokens": 10100,
      "tokens_saved_pct": 45.4,
      "reusable_candidate": "SEO content writer + TokRepo SEO workflow"
    },
    {
      "id": "codex-review-skill",
      "category": "code_review",
      "expected_asset_kind": "skill",
      "baseline_tokens": 14200,
      "tokrepo_tokens": 8000,
      "tokens_saved_pct": 43.7,
      "reusable_candidate": "code-review checklist skill"
    },
    {
      "id": "mcp-server-bootstrap",
      "category": "mcp",
      "expected_asset_kind": "mcp",
      "baseline_tokens": 21000,
      "tokrepo_tokens": 12000,
      "tokens_saved_pct": 42.9,
      "reusable_candidate": "mcp-builder + registry manifest template"
    },
    {
      "id": "frontend-design-audit",
      "category": "frontend",
      "expected_asset_kind": "skill",
      "baseline_tokens": 9800,
      "tokrepo_tokens": 5700,
      "tokens_saved_pct": 41.8,
      "reusable_candidate": "frontend-design skill"
    },
    {
      "id": "playwright-production-smoke",
      "category": "testing",
      "expected_asset_kind": "script",
      "baseline_tokens": 16400,
      "tokrepo_tokens": 9300,
      "tokens_saved_pct": 43.3,
      "reusable_candidate": "production agent discovery check script"
    },
    {
      "id": "go-zero-endpoint",
      "category": "backend",
      "expected_asset_kind": "skill",
      "baseline_tokens": 13200,
      "tokrepo_tokens": 7600,
      "tokens_saved_pct": 42.4,
      "reusable_candidate": "go-zero backend guideline skill"
    },
    {
      "id": "agent-memory-bootstrap",
      "category": "agent_bootstrap",
      "expected_asset_kind": "config",
      "baseline_tokens": 24000,
      "tokrepo_tokens": 13500,
      "tokens_saved_pct": 43.8,
      "reusable_candidate": "tokrepo init-agent starter template"
    },
    {
      "id": "security-reviewer-policy",
      "category": "security",
      "expected_asset_kind": "policy",
      "baseline_tokens": 11600,
      "tokrepo_tokens": 6900,
      "tokens_saved_pct": 40.5,
      "reusable_candidate": "security reviewer + default agent policy pack"
    },
    {
      "id": "rag-ingestion-script",
      "category": "data",
      "expected_asset_kind": "script",
      "baseline_tokens": 15100,
      "tokrepo_tokens": 8300,
      "tokens_saved_pct": 45,
      "reusable_candidate": "RAG ingestion workflow/script template"
    },
    {
      "id": "github-actions-ci-fix",
      "category": "devops",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 17800,
      "tokrepo_tokens": 9800,
      "tokens_saved_pct": 44.9,
      "reusable_candidate": "Git workflow guideline + CI repair workflow"
    },
    {
      "id": "database-migration-review",
      "category": "database",
      "expected_asset_kind": "skill",
      "baseline_tokens": 12500,
      "tokrepo_tokens": 7600,
      "tokens_saved_pct": 39.2,
      "reusable_candidate": "database-guidelines skill"
    },
    {
      "id": "llm-eval-guardrails",
      "category": "evals",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 20300,
      "tokrepo_tokens": 11200,
      "tokens_saved_pct": 44.8,
      "reusable_candidate": "eval harness workflow"
    },
    {
      "id": "ffmpeg-video-pipeline",
      "category": "video",
      "expected_asset_kind": "skill",
      "baseline_tokens": 9700,
      "tokrepo_tokens": 6000,
      "tokens_saved_pct": 38.1,
      "reusable_candidate": "FFmpeg video editor skill"
    },
    {
      "id": "slack-automation",
      "category": "automation",
      "expected_asset_kind": "script",
      "baseline_tokens": 18900,
      "tokrepo_tokens": 10400,
      "tokens_saved_pct": 45,
      "reusable_candidate": "Slack automation workflow"
    },
    {
      "id": "claude-to-codex-skill-port",
      "category": "multi_agent",
      "expected_asset_kind": "skill",
      "baseline_tokens": 22100,
      "tokrepo_tokens": 12800,
      "tokens_saved_pct": 42.1,
      "reusable_candidate": "multi-agent compatibility matrix + skill creator"
    },
    {
      "id": "agent-handoff-template",
      "category": "handoff",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 15400,
      "tokrepo_tokens": 8700,
      "tokens_saved_pct": 43.5,
      "reusable_candidate": "agent-handoff workflow"
    },
    {
      "id": "local-llm-config",
      "category": "llm_ops",
      "expected_asset_kind": "config",
      "baseline_tokens": 13800,
      "tokrepo_tokens": 7900,
      "tokens_saved_pct": 42.8,
      "reusable_candidate": "local LLM setup workflow"
    },
    {
      "id": "vercel-deploy-debug",
      "category": "deployment",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 16600,
      "tokrepo_tokens": 9200,
      "tokens_saved_pct": 44.6,
      "reusable_candidate": "deployment debug workflow"
    },
    {
      "id": "multi-agent-research-plan",
      "category": "research",
      "expected_asset_kind": "prompt",
      "baseline_tokens": 11900,
      "tokrepo_tokens": 7200,
      "tokens_saved_pct": 39.5,
      "reusable_candidate": "multi-agent research prompt/workflow"
    },
    {
      "id": "cross-agent-marketplace-listing",
      "category": "distribution",
      "expected_asset_kind": "workflow",
      "baseline_tokens": 26000,
      "tokrepo_tokens": 14600,
      "tokens_saved_pct": 43.8,
      "reusable_candidate": "agent ecosystem distribution playbook"
    }
  ],
  "required_agent_flow": [
    "tokrepo_discover",
    "tokrepo_detail",
    "tokrepo_verify",
    "tokrepo_install_plan",
    "dry-run or stage before writes",
    "tokrepo_handoff_plan or tokrepo agent-handoff after reusable work"
  ]
}
