From de014e9b132fb2e71d27936b2cfba5e86cccffe4 Mon Sep 17 00:00:00 2001 From: johba Date: Fri, 20 Mar 2026 22:02:13 +0000 Subject: [PATCH] fix: feat: implement evidence/resources and evidence/protocol logging (#1059) - Add evidence/resources/ and evidence/protocol/ directories with .gitkeep - Add schemas for resources/ and protocol/ to evidence/README.md - Create formulas/run-resources.toml (sense formula: disk/RAM/API/CI metrics, daily cron 06:00 UTC, verdict: ok/warn/critical) - Create formulas/run-protocol.toml (sense formula: TVL/fees/positions/ rebalance frequency via LmTotalEth.s.sol + cast, daily cron 07:00 UTC, verdict: healthy/degraded/offline) - Update STATE.md Co-Authored-By: Claude Sonnet 4.6 --- STATE.md | 2 +- evidence/README.md | 115 ++++++++++++++++++++++ evidence/protocol/.gitkeep | 0 evidence/resources/.gitkeep | 0 formulas/run-protocol.toml | 187 ++++++++++++++++++++++++++++++++++++ formulas/run-resources.toml | 155 ++++++++++++++++++++++++++++++ 6 files changed, 458 insertions(+), 1 deletion(-) create mode 100644 evidence/protocol/.gitkeep create mode 100644 evidence/resources/.gitkeep create mode 100644 formulas/run-protocol.toml create mode 100644 formulas/run-resources.toml diff --git a/STATE.md b/STATE.md index e5e7ef9..ad264fc 100644 --- a/STATE.md +++ b/STATE.md @@ -50,4 +50,4 @@ - [2026-03-15] txnBot AGENTS.md ENVIRONMENT enum is stale (#784) - [2026-03-20] Adoption milestone state ambiguity in MEMORY.md (#1068) - [2026-03-20] OptimizerV3Push3 as IOptimizer always returns bear defaults — integration risk (#1063) -- [2026-03-20] Red-team schema should add candidate_commit field (#1066) +- [2026-03-20] implement evidence/resources and evidence/protocol logging (#1059): formulas/run-resources.toml (disk/RAM/API/CI sense formula, daily cron 06:00 UTC) and formulas/run-protocol.toml (TVL/fees/positions/rebalances sense formula, daily cron 07:00 UTC); evidence/resources/ and evidence/protocol/ directories; schemas in evidence/README.md diff --git a/evidence/README.md b/evidence/README.md index 259147f..363beb5 100644 --- a/evidence/README.md +++ b/evidence/README.md @@ -22,6 +22,10 @@ evidence/ YYYY-MM-DD-prNNN.json # per-scenario pass/fail, gate decision user-test/ YYYY-MM-DD.json # per-persona reports, screenshot refs, friction points + resources/ + YYYY-MM-DD.json # disk, RAM, API call counts, budget burn, CI queue depth + protocol/ + YYYY-MM-DD.json # TVL, accumulated fees, position count, rebalance frequency ``` ## Delivery Pattern @@ -215,3 +219,114 @@ Records a UX evaluation run across simulated personas. | `personas_total` | integer | Total personas evaluated | | `critical_friction_points` | array of strings | Friction points that blocked task completion | | `verdict` | string | `"pass"` if all personas completed, `"fail"` otherwise | + +--- + +## Schema: `resources/YYYY-MM-DD.json` + +Records one infrastructure resource snapshot. + +```json +{ + "date": "YYYY-MM-DD", + "disk": { + "used_bytes": 85899345920, + "total_bytes": 107374182400, + "used_pct": 80.0 + }, + "ram": { + "used_bytes": 3221225472, + "total_bytes": 8589934592, + "used_pct": 37.5 + }, + "api": { + "anthropic_calls_24h": 142, + "anthropic_budget_usd_used": 4.87, + "anthropic_budget_usd_limit": 50.0, + "anthropic_budget_pct": 9.7 + }, + "ci": { + "woodpecker_queue_depth": 2, + "woodpecker_running": 1 + }, + "staleness_threshold_days": 1, + "verdict": "ok" | "warn" | "critical" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `date` | string (ISO) | Date of the snapshot | +| `disk.used_bytes` | integer | Bytes used on the primary volume | +| `disk.total_bytes` | integer | Total bytes on the primary volume | +| `disk.used_pct` | number | Percentage of disk used | +| `ram.used_bytes` | integer | Bytes of RAM in use | +| `ram.total_bytes` | integer | Total bytes of RAM | +| `ram.used_pct` | number | Percentage of RAM used | +| `api.anthropic_calls_24h` | integer | Anthropic API calls in the past 24 hours | +| `api.anthropic_budget_usd_used` | number | USD spent against the Anthropic budget | +| `api.anthropic_budget_usd_limit` | number | Configured Anthropic budget ceiling in USD | +| `api.anthropic_budget_pct` | number | Percentage of budget consumed | +| `ci.woodpecker_queue_depth` | integer | Number of jobs waiting in the Woodpecker CI queue | +| `ci.woodpecker_running` | integer | Number of Woodpecker jobs currently running | +| `staleness_threshold_days` | integer | Maximum age in days before this record is considered stale (always 1) | +| `verdict` | string | `"ok"` (all metrics normal), `"warn"` (≥80% on any dimension), or `"critical"` (≥95% on any dimension) | + +--- + +## Schema: `protocol/YYYY-MM-DD.json` + +Records one on-chain protocol health snapshot. + +```json +{ + "date": "YYYY-MM-DD", + "block_number": 24500000, + "tvl_eth": "1234567890000000000000", + "tvl_eth_formatted": "1234.57", + "accumulated_fees_eth": "12345678900000000", + "accumulated_fees_eth_formatted": "0.012", + "position_count": 3, + "positions": [ + { + "name": "floor", + "tick_lower": -887272, + "tick_upper": -200000, + "liquidity": "987654321000000000" + }, + { + "name": "anchor", + "tick_lower": -200000, + "tick_upper": 0 + }, + { + "name": "discovery", + "tick_lower": 0, + "tick_upper": 887272 + } + ], + "rebalance_count_24h": 4, + "last_rebalance_block": 24499800, + "staleness_threshold_days": 1, + "verdict": "healthy" | "degraded" | "offline" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `date` | string (ISO) | Date of the snapshot | +| `block_number` | integer | Block number at time of snapshot | +| `tvl_eth` | string (wei) | Total value locked across all LM positions in wei | +| `tvl_eth_formatted` | string | TVL formatted in ETH (2 dp) | +| `accumulated_fees_eth` | string (wei) | Fees accumulated by the LiquidityManager in wei | +| `accumulated_fees_eth_formatted` | string | Fees formatted in ETH (3 dp) | +| `position_count` | integer | Number of active Uniswap V3 positions (expected: 3) | +| `positions` | array | One entry per active position | +| `positions[].name` | string | Position label: `"floor"`, `"anchor"`, or `"discovery"` | +| `positions[].tick_lower` | integer | Lower tick boundary | +| `positions[].tick_upper` | integer | Upper tick boundary | +| `positions[].liquidity` | string | Liquidity amount in the position (wei-scale integer) | +| `rebalance_count_24h` | integer | Number of `recenter()` calls in the past 24 hours | +| `last_rebalance_block` | integer | Block number of the most recent `recenter()` call | +| `staleness_threshold_days` | integer | Maximum age in days before this record is considered stale (always 1) | +| `verdict` | string | `"healthy"` (positions active, TVL > 0), `"degraded"` (position_count < 3 or rebalance stalled), or `"offline"` (TVL = 0 or contract unreachable) | diff --git a/evidence/protocol/.gitkeep b/evidence/protocol/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/evidence/resources/.gitkeep b/evidence/resources/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/formulas/run-protocol.toml b/formulas/run-protocol.toml new file mode 100644 index 0000000..7888b1a --- /dev/null +++ b/formulas/run-protocol.toml @@ -0,0 +1,187 @@ +# formulas/run-protocol.toml +# +# On-chain protocol health snapshot — collect TVL, accumulated fees, +# position count, and rebalance frequency from the deployed LiquidityManager. +# Write a structured JSON evidence file for planner and predictor consumption. +# +# Type: sense. Read-only — produces metrics only, no git artifacts. +# +# Staleness threshold: 1 day (matches evidence/protocol/ schema). +# Cron: daily at 07:00 UTC (staggered 1 h after run-resources). + +[formula] +id = "run-protocol" +name = "On-Chain Protocol Health Snapshot" +description = "Collect TVL, accumulated fees, position count, and rebalance frequency from the deployed LiquidityManager; write evidence/protocol/{date}.json." +type = "sense" +# "sense" → read-only, produces metrics only +# "act" → produces git artifacts (cf. run-evolution, run-red-team) + +# ── Cron ─────────────────────────────────────────────────────────────────────── + +[cron] +schedule = "0 7 * * *" # daily at 07:00 UTC (1 h after run-resources) +description = "Matches 1-day staleness threshold — one snapshot per day keeps the record fresh." + +# ── Inputs ───────────────────────────────────────────────────────────────────── + +[inputs.rpc_url] +type = "string" +required = true +description = """ +Base network RPC endpoint used to query on-chain state. +Example: https://mainnet.base.org or a running Anvil fork URL. +""" + +[inputs.deployments_file] +type = "string" +required = false +default = "onchain/deployments-local.json" +description = """ +Path to the deployments JSON file containing contract addresses. +The formula reads LiquidityManager address from this file. +Use onchain/deployments.json for mainnet; onchain/deployments-local.json +for a local Anvil fork. +""" + +[inputs.lookback_blocks] +type = "integer" +required = false +default = 7200 +description = """ +Number of blocks to scan for Recenter events when computing +rebalance_count_24h (~24 h of Base blocks at ~2 s/block). +""" + +# ── Execution ────────────────────────────────────────────────────────────────── + +[execution] +script = "scripts/harb-evaluator/run-protocol.sh" +invocation = "RPC_URL={rpc_url} DEPLOYMENTS_FILE={deployments_file} LOOKBACK_BLOCKS={lookback_blocks} bash scripts/harb-evaluator/run-protocol.sh" + +# Exit codes: +# 0 snapshot written successfully +# 2 infrastructure error (RPC unreachable, missing deployments file, forge unavailable, etc.) + +# ── Steps ────────────────────────────────────────────────────────────────────── + +[[steps]] +id = "read-addresses" +description = """ +Read the LiquidityManager contract address from {deployments_file}. +Fail with exit code 2 if the file is absent or the address is missing. +""" + +[[steps]] +id = "collect-tvl" +description = """ +Query LiquidityManager total ETH via forge script LmTotalEth.s.sol +against {rpc_url}. +Records tvl_eth (wei string) and tvl_eth_formatted (ETH, 2 dp). +LmTotalEth.s.sol uses exact Uniswap V3 integer math (LiquidityAmounts + +TickMath) to sum free ETH, free WETH, and ETH locked across all three +positions (floor, anchor, discovery). +""" +forge_script = "onchain/script/LmTotalEth.s.sol" + +[[steps]] +id = "collect-fees" +description = """ +Query accumulated protocol fees from the LiquidityManager via cast call: + LiquidityManager.accumulatedFees() → uint256 +Records accumulated_fees_eth (wei string) and accumulated_fees_eth_formatted +(ETH, 3 dp). +Falls back to 0 gracefully if the function is not present on the deployed +contract (older deployment without fee tracking). +""" + +[[steps]] +id = "collect-positions" +description = """ +Query the three Uniswap V3 positions held by the LiquidityManager: + LiquidityManager.floorPosition() → (tickLower, tickUpper, liquidity) + LiquidityManager.anchorPosition() → (tickLower, tickUpper, liquidity) + LiquidityManager.discoveryPosition() → (tickLower, tickUpper, liquidity) +Records position_count (number of positions with liquidity > 0) and the +positions array. +""" + +[[steps]] +id = "collect-rebalances" +description = """ +Count Recenter events emitted by the LiquidityManager in the past +{lookback_blocks} blocks via eth_getLogs. +Records: + - rebalance_count_24h: total Recenter event count in the window. + - last_rebalance_block: block number of the most recent Recenter event + (0 if none found in the window). +""" +event_signature = "Recenter(int24,int24,int24)" + +[[steps]] +id = "collect" +description = """ +Assemble all collected metrics into evidence/protocol/{date}.json. +Compute verdict: + - "offline" if tvl_eth = 0 or RPC was unreachable. + - "degraded" if position_count < 3, or rebalance_count_24h = 0 and the + protocol has been live for > 1 day. + - "healthy" otherwise. +Write the file conforming to the schema in evidence/README.md +## Schema: protocol/YYYY-MM-DD.json. +""" +output = "evidence/protocol/{date}.json" +schema = "evidence/README.md" # see ## Schema: protocol/YYYY-MM-DD.json + +[[steps]] +id = "deliver" +description = """ +Commit evidence/protocol/{date}.json to main. +Post a one-line summary comment to the originating issue (if any): + verdict, tvl_eth_formatted, accumulated_fees_eth_formatted, + position_count, rebalance_count_24h. +On "degraded" or "offline": highlight the failing dimension and its value. +""" + +# ── Products ─────────────────────────────────────────────────────────────────── + +[products.evidence_file] +path = "evidence/protocol/{date}.json" +delivery = "commit to main" +schema = "evidence/README.md" # see ## Schema: protocol/YYYY-MM-DD.json + +[products.issue_comment] +delivery = "post to originating issue (if any)" +content = "verdict, tvl_eth_formatted, accumulated_fees_eth_formatted, position_count, rebalance_count_24h" +on_degraded = "highlight failing dimension and its current value" + +# ── Resources ────────────────────────────────────────────────────────────────── + +[resources] +profile = "light" +compute = "local — forge script + cast calls only; no Anvil or Docker startup required" +rpc = "Base network RPC ({rpc_url}) — read-only calls" +concurrency = "safe to run in parallel with other formulas" + +# ── Notes ────────────────────────────────────────────────────────────────────── + +[notes] +tvl_metric = """ +TVL is measured as LiquidityManager total ETH: free ETH + free WETH + ETH +locked across all three Uniswap V3 positions (floor, anchor, discovery). +Uses the same LmTotalEth.s.sol forge script as run-red-team to ensure +consistent measurement methodology. +""" + +rebalance_staleness = """ +A zero rebalance_count_24h on an established deployment indicates the +recenter() upkeep bot (services/txnBot) has stalled. The "degraded" +verdict triggers a planner alert. On a fresh deployment (< 1 day old) +zero rebalances is expected and does not trigger degraded. +""" + +fees_fallback = """ +accumulated_fees_eth falls back to 0 for deployments without fee tracking. +The verdict is not affected by a zero fee value alone — only TVL and +position_count drive the verdict. +""" diff --git a/formulas/run-resources.toml b/formulas/run-resources.toml new file mode 100644 index 0000000..6731e6d --- /dev/null +++ b/formulas/run-resources.toml @@ -0,0 +1,155 @@ +# formulas/run-resources.toml +# +# Infrastructure resource snapshot — collect disk usage, RAM trends, +# Anthropic API call counts and budget burn, and Woodpecker CI queue depth. +# Write a structured JSON evidence file for planner and predictor consumption. +# +# Type: sense. Read-only — produces metrics only, no git artifacts. +# +# Staleness threshold: 1 day (matches evidence/resources/ schema). +# Cron: daily at 06:00 UTC. + +[formula] +id = "run-resources" +name = "Infrastructure Resource Snapshot" +description = "Collect disk, RAM, API usage, Anthropic budget burn, and CI queue depth; write evidence/resources/{date}.json." +type = "sense" +# "sense" → read-only, produces metrics only +# "act" → produces git artifacts (cf. run-evolution, run-red-team) + +# ── Cron ─────────────────────────────────────────────────────────────────────── + +[cron] +schedule = "0 6 * * *" # daily at 06:00 UTC +description = "Matches 1-day staleness threshold — one snapshot per day keeps the record fresh." + +# ── Inputs ───────────────────────────────────────────────────────────────────── + +[inputs.disk_path] +type = "string" +required = false +default = "/" +description = "Filesystem path to measure disk usage for (passed to df)." + +[inputs.anthropic_budget_usd_limit] +type = "number" +required = false +default = 50.0 +description = "Configured Anthropic budget ceiling in USD. Used to compute budget_pct in the evidence record." + +[inputs.woodpecker_api_url] +type = "string" +required = false +default = "http://localhost:8090" +description = "Base URL of the Woodpecker CI API. Set to empty string to skip CI metrics." + +# ── Execution ────────────────────────────────────────────────────────────────── + +[execution] +script = "scripts/harb-evaluator/run-resources.sh" +invocation = "DISK_PATH={disk_path} ANTHROPIC_BUDGET_USD_LIMIT={anthropic_budget_usd_limit} WOODPECKER_API_URL={woodpecker_api_url} bash scripts/harb-evaluator/run-resources.sh" + +# Exit codes: +# 0 snapshot written successfully +# 2 infrastructure error (disk command unavailable, JSON write failed, etc.) + +# ── Steps ────────────────────────────────────────────────────────────────────── + +[[steps]] +id = "collect-disk" +description = """ +Measure disk usage on {disk_path} via `df -B1 {disk_path}`. +Extract used_bytes, total_bytes, and used_pct. +""" + +[[steps]] +id = "collect-ram" +description = """ +Measure RAM usage via `free -b` (Linux) or `vm_stat` (macOS). +Extract used_bytes, total_bytes, and used_pct. +""" + +[[steps]] +id = "collect-api" +description = """ +Collect Anthropic API metrics: + - anthropic_calls_24h: count of API calls in the past 24 hours (read from + tmp/anthropic-call-log.jsonl if present; 0 if absent). + - anthropic_budget_usd_used: sum of cost_usd entries in the call log for + the current calendar day (UTC); 0 if log absent. + - anthropic_budget_usd_limit: from {anthropic_budget_usd_limit} input. + - anthropic_budget_pct: used / limit * 100 (0 if limit = 0). +""" +call_log = "tmp/anthropic-call-log.jsonl" + +[[steps]] +id = "collect-ci" +description = """ +Query Woodpecker CI API for queue state. +GET {woodpecker_api_url}/api/queue/info: + - woodpecker_queue_depth: length of the waiting queue. + - woodpecker_running: count of currently running jobs. +Skipped gracefully (fields set to null) when {woodpecker_api_url} is empty +or the endpoint is unreachable. +""" + +[[steps]] +id = "collect" +description = """ +Assemble all collected metrics into evidence/resources/{date}.json. +Compute verdict: + - "critical" if disk_used_pct ≥ 95, ram_used_pct ≥ 95, + or anthropic_budget_pct ≥ 95. + - "warn" if disk_used_pct ≥ 80, ram_used_pct ≥ 80, + or anthropic_budget_pct ≥ 80. + - "ok" otherwise. +Write the file conforming to the schema in evidence/README.md +## Schema: resources/YYYY-MM-DD.json. +""" +output = "evidence/resources/{date}.json" +schema = "evidence/README.md" # see ## Schema: resources/YYYY-MM-DD.json + +[[steps]] +id = "deliver" +description = """ +Commit evidence/resources/{date}.json to main. +Post a one-line summary comment to the originating issue (if any): + verdict, disk_used_pct, ram_used_pct, anthropic_budget_pct, ci queue depth. +On "warn" or "critical": highlight the breaching dimensions. +""" + +# ── Products ─────────────────────────────────────────────────────────────────── + +[products.evidence_file] +path = "evidence/resources/{date}.json" +delivery = "commit to main" +schema = "evidence/README.md" # see ## Schema: resources/YYYY-MM-DD.json + +[products.issue_comment] +delivery = "post to originating issue (if any)" +content = "verdict, disk_used_pct, ram_used_pct, anthropic_budget_pct, ci queue depth" +on_warn = "highlight breaching dimensions and current values" + +# ── Resources ────────────────────────────────────────────────────────────────── + +[resources] +profile = "light" +compute = "local — shell commands only (df, free, curl); no Docker or Anvil required" +concurrency = "safe to run in parallel with other formulas" + +# ── Notes ────────────────────────────────────────────────────────────────────── + +[notes] +call_log = """ +tmp/anthropic-call-log.jsonl is expected to have one JSON object per line, +each with at minimum: + { "ts": "", "cost_usd": } +The file is written by the dark-factory agent loop. When absent the API +metrics default to 0 — the snapshot is still written rather than failing. +""" + +disk_warn = """ +Planner MEMORY.md (2026-03-20) notes disk at 79%. The "warn" threshold +(≥80%) will fire on the first run-resources pass. Monitor trajectory; +evidence pipeline data accumulation will increase disk pressure. +"""