diff --git a/formulas/run-holdout.toml b/formulas/run-holdout.toml
new file mode 100644
index 0000000..eac2875
--- /dev/null
+++ b/formulas/run-holdout.toml
@@ -0,0 +1,138 @@
+# formulas/run-holdout.toml
+#
+# Holdout quality gate — deploy a PR branch, run blind holdout scenarios,
+# report pass/fail.
+#
+# Type: sense-only.  Produces metrics and a gate decision.
+# Does NOT commit code, open PRs, or modify contracts.
+#
+# Depends on: #973 (evidence/holdout/ directory structure)
+
+[formula]
+id          = "run-holdout"
+name        = "Holdout Quality Gate"
+description = "Deploy PR branch, run blind holdout scenarios, report pass/fail."
+type        = "sense"
+# "sense"  → read-only, produces metrics only
+# "act"    → produces git artifacts (cf. run-evolution, run-red-team)
+
+# ── Inputs ─────────────────────────────────────────────────────────────────────
+
+[inputs.pr_number]
+type        = "integer"
+required    = true
+description = "PR number to evaluate"
+
+[inputs.holdout_repo]
+type        = "string"
+required    = false
+default     = "ssh://git@codeberg.org/johba/harb-holdout-scenarios.git"
+description = """
+Holdout scenarios repo. Dev-agent has no read access — cloned at runtime
+by evaluate.sh into the ephemeral worktree, never checked in to harb.
+"""
+
+# ── Execution ──────────────────────────────────────────────────────────────────
+#
+# The orchestrator invokes evaluate.sh, which owns the full lifecycle:
+# checkout → build → boot stack → clone holdout repo → playwright → teardown.
+
+[execution]
+script     = "scripts/harb-evaluator/evaluate.sh"
+invocation = "bash scripts/harb-evaluator/evaluate.sh {pr_number}"
+
+# Exit codes propagated by evaluate.sh:
+#   0  gate passed (≥90% of scenarios achieved 2/3 majority)
+#   1  gate failed (at least one scenario failed the 2/3 threshold)
+#   2  infra error (stack failed to start, missing dependency, etc.)
+
+# ── Steps ──────────────────────────────────────────────────────────────────────
+
+[[steps]]
+id          = "boot-stack"
+description = """
+Spin up full docker stack from PR branch.
+evaluate.sh creates an isolated git worktree, builds kraiken-lib,
+installs npm deps, installs Playwright browser binaries, then runs:
+  docker compose -p harb-eval-{pr_number} up -d
+Waits for anvil (healthy), bootstrap (exited 0), ponder (healthy + /ready).
+"""
+
+[[steps]]
+id          = "clone-holdout"
+description = """
+Clone harb-holdout-scenarios into .holdout-scenarios/ inside the worktree.
+Sets HOLDOUT_SCENARIOS_DIR for holdout.config.ts.
+The dev-agent never sees this repo; the wall is enforced by separate
+repository access control on Codeberg.
+"""
+
+[[steps]]
+id          = "run-scenarios"
+description = """
+Run 8 Playwright specs via holdout.config.ts (workers=1, headless chromium).
+4 surfaces: contracts, graphql, landing, webapp.
+Each scenario is executed up to 3 times; 2/3 runs must pass.
+"""
+surfaces              = ["contracts", "graphql", "landing", "webapp"]
+scenarios_per_surface = 2
+scenarios_total       = 8
+runs_per_scenario     = 3
+pass_per_scenario     = 2   # 2-of-3 majority required for a scenario to count as passed
+
+[[steps]]
+id          = "teardown"
+description = """
+docker compose -p harb-eval-{pr_number} down -v --remove-orphans
+git worktree remove --force {worktree_dir}
+Always runs — cleanup is registered as a shell trap in evaluate.sh.
+"""
+
+[[steps]]
+id          = "deliver"
+description = """
+Collect per-scenario results from test-results/holdout-reports/.
+Write evidence/holdout/{date}-pr{pr_number}.json and commit to main.
+Post gate verdict to issue #{pr_number}.
+On failure: include one-line reason per failed scenario.
+Scenario text is never exposed to the dev-agent.
+"""
+
+# ── Gate ───────────────────────────────────────────────────────────────────────
+
+[gate]
+pass_threshold_pct = 90   # ≥90% of scenarios must pass
+scenarios_total    = 8    # 8 * 0.9 = 7.2 → at least 8 must pass to clear 90%
+per_scenario_runs  = 3
+per_scenario_pass  = 2    # 2-of-3 majority per scenario
+
+# ── Products ───────────────────────────────────────────────────────────────────
+
+[products.evidence_file]
+path     = "evidence/holdout/{date}-pr{pr_number}.json"
+delivery = "commit to main"
+schema   = "evidence/README.md"  # see §Schema: holdout/YYYY-MM-DD-prNNN.json
+
+[products.issue_comment]
+delivery   = "post to issue #{pr_number}"
+content    = "gate verdict (pass/fail), scenarios_passed/scenarios_total, link to evidence file"
+on_failure = "one-line failure reason per failing scenario; scenario text never revealed"
+
+# ── Resources ──────────────────────────────────────────────────────────────────
+
+[resources]
+profile     = "heavy"
+containers  = "5+"    # anvil, bootstrap, ponder, webapp, (caddy if needed)
+browser     = "chromium (Playwright)"
+ports       = ["8545", "42069", "5173", "8081", "5100"]
+concurrency = "exclusive — port bindings prevent parallel runs on the same host"
+
+# ── Notes ──────────────────────────────────────────────────────────────────────
+
+[notes]
+wall = """
+The holdout-specs repo (harb-holdout-scenarios) is intentionally inaccessible
+to the dev-agent. The agent receives only pass/fail and one-line failure reasons
+— never the scenario text. This is enforced by Codeberg repo permissions, not
+by runtime filtering.
+"""