Merge pull request 'fix: Holdout evaluator: Playwright browser-based scenario scripts (#381)' (#388) from fix/issue-381 into master

This commit is contained in:
johba 2026-03-01 13:32:41 +01:00
commit 6c4ede16ab
3 changed files with 289 additions and 29 deletions

View file

@ -147,6 +147,22 @@ log "Building kraiken-lib..."
(cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \
|| infra_error "kraiken-lib build failed"
# ── Install root npm dependencies (needed for npx playwright test) ─────
# --ignore-scripts: prevents husky from touching the permanent repo's .git/hooks
# from inside this ephemeral worktree.
# --quiet: suppresses normal npm output while still printing errors.
log "Installing root npm dependencies..."
(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --ignore-scripts --quiet) \
|| infra_error "npm install failed"
# ── Install Playwright browser binaries ────────────────────────────────
# Browser binaries are version-pinned per Playwright revision. If the
# revision resolved by ^1.55.1 is not already cached on this host,
# playwright test aborts immediately with a cryptic "Executable doesn't exist" error.
log "Installing Playwright browser binaries..."
(cd "$WORKTREE_DIR" && npx playwright install chromium) \
|| infra_error "playwright install chromium failed"
# ── Boot the stack ─────────────────────────────────────────────────────
cd "$WORKTREE_DIR"
log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
@ -249,7 +265,7 @@ if [[ "$ponder_ready" != "true" ]]; then
infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s"
fi
# ── Export stack endpoints for scenario scripts ────────────────────────
# ── Export stack endpoints ─────────────────────────────────────────────
export EVAL_PR_NUMBER="$PR_NUMBER"
export EVAL_BRANCH="$PR_BRANCH"
export EVAL_WORKTREE="$WORKTREE_DIR"
@ -260,38 +276,24 @@ export EVAL_KRAIKEN="$KRAIKEN"
export EVAL_STAKE="$STAKE"
export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER"
# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly
export STACK_RPC_URL="$EVAL_RPC_URL"
export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL"
export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL"
log "Stack ready. Endpoints:"
log " RPC: $EVAL_RPC_URL"
log " GraphQL: $EVAL_GRAPHQL_URL"
log " WebApp: $EVAL_WEBAPP_URL"
# ── Run scenario scripts ───────────────────────────────────────────────
shopt -s nullglob
scenario_scripts=("$SCENARIOS_DIR"/*.sh)
shopt -u nullglob
if [[ ${#scenario_scripts[@]} -eq 0 ]]; then
log "No scenario scripts found in $SCENARIOS_DIR"
log "Gate PASSED (no scenarios)"
# ── Run holdout Playwright scenarios ──────────────────────────────────
# CI=true triggers forbidOnly in holdout.config.ts so accidental test.only()
# in any scenario file causes an immediate failure rather than a silent partial run.
log "Running holdout scenarios via Playwright..."
cd "$WORKTREE_DIR"
if CI=true npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
log "Gate PASSED"
exit 0
else
gate_fail "One or more holdout scenarios failed"
fi
failures=0
for scenario in "${scenario_scripts[@]}"; do
[[ -f "$scenario" ]] || continue
scenario_name="$(basename "$scenario")"
log "--- Running scenario: $scenario_name ---"
if bash "$scenario"; then
log " PASSED: $scenario_name"
else
log " FAILED: $scenario_name"
failures=$((failures + 1))
fi
done
if (( failures > 0 )); then
gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed"
fi
log "Gate PASSED (${#scenario_scripts[@]} scenario(s))"
exit 0

View file

@ -0,0 +1,46 @@
import { defineConfig, devices } from '@playwright/test';
/**
* Playwright config for holdout scenarios.
*
* Holdout specs live under scripts/harb-evaluator/scenarios/ and reuse the
* existing tests/setup/ infrastructure (wallet-provider, stack, navigate).
*
* The evaluator boots the stack first, then runs:
* npx playwright test --config scripts/harb-evaluator/holdout.config.ts
*
* Required env vars (set by evaluate.sh):
* STACK_RPC_URL Anvil JSON-RPC endpoint
* STACK_WEBAPP_URL Vite dev server URL
* STACK_GRAPHQL_URL Ponder GraphQL endpoint
*/
export default defineConfig({
testDir: './scenarios',
fullyParallel: false,
// evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always
// active in the evaluator context. Accidental test.only() in any scenario file
// causes an immediate failure rather than a silent partial run.
forbidOnly: !!process.env.CI,
retries: 0,
workers: 1,
reporter: 'list',
timeout: 5 * 60 * 1000, // 5 min per test — scenarios involve on-chain txns
expect: {
timeout: 30_000,
},
use: {
headless: true,
viewport: { width: 1280, height: 720 },
screen: { width: 1280, height: 720 },
actionTimeout: 60_000,
launchOptions: {
args: ['--disable-dev-shm-usage', '--no-sandbox'],
},
},
projects: [
{
name: 'chromium',
use: { ...devices['Desktop Chrome'] },
},
],
});

View file

@ -0,0 +1,212 @@
/**
* Holdout scenario: sovereign-exit / always-leave
*
* Verifies the core protocol invariant: a user can ALWAYS exit their position
* by buying KRK through the in-app swap widget and then selling it back.
*
* Reuses tests/setup/ infrastructure no new wallet or navigation helpers.
*
* Account 5 from the Anvil test mnemonic is used so it never collides with
* the deploy scripts (which use accounts 01).
*/
import { expect, test } from '@playwright/test';
import { Interface, Wallet } from 'ethers';
import { createWalletContext } from '../../../../tests/setup/wallet-provider';
import { getStackConfig } from '../../../../tests/setup/stack';
import { navigateSPA } from '../../../../tests/setup/navigate';
// Anvil account 5 — never used by deploy or txnBot
const PK = '0x8b3a350cf5c34c9194ca85829a2df0ec3153be0318b5e2d3348e872092edffba';
const ACCOUNT = new Wallet(PK);
const ACCOUNT_ADDRESS = ACCOUNT.address;
// Infrastructure addresses that are stable across Anvil forks of Base Sepolia
const SWAP_ROUTER = '0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4';
const WETH = '0x4200000000000000000000000000000000000006';
const POOL_FEE = 10_000; // 1% tier used by KRAIKEN pool
// ── RPC helpers (Node.js context) ──────────────────────────────────────────
async function rpcCall(rpcUrl: string, method: string, params: unknown[]): Promise<unknown> {
const resp = await fetch(rpcUrl, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params }),
});
const payload = await resp.json();
if (payload.error) throw new Error(`RPC ${method}: ${payload.error.message}`);
return payload.result;
}
async function getKrkBalance(rpcUrl: string, krkAddress: string, account: string): Promise<bigint> {
const selector = '0x70a08231'; // balanceOf(address)
const data = selector + account.slice(2).padStart(64, '0');
const result = (await rpcCall(rpcUrl, 'eth_call', [{ to: krkAddress, data }, 'latest'])) as string;
return BigInt(result);
}
/**
* Poll eth_getTransactionReceipt until the tx is mined or maxAttempts exceeded.
* Anvil with automine mines synchronously before returning the tx hash, so this
* resolves almost immediately. The explicit check guards against Anvil instances
* configured with block intervals or high RPC latency.
*/
async function waitForReceipt(rpcUrl: string, txHash: string, maxAttempts = 20): Promise<void> {
for (let i = 0; i < maxAttempts; i++) {
const receipt = await rpcCall(rpcUrl, 'eth_getTransactionReceipt', [txHash]);
if (receipt !== null) return;
await new Promise(r => setTimeout(r, 500));
}
throw new Error(`Transaction ${txHash} not mined after ${maxAttempts * 500}ms`);
}
// ── ABI helpers for the sell path ──────────────────────────────────────────
const ERC20_ABI = ['function approve(address spender, uint256 amount) returns (bool)'];
const ROUTER_ABI = [
'function exactInputSingle((address tokenIn, address tokenOut, uint24 fee, address recipient, uint256 amountIn, uint256 amountOutMinimum, uint160 sqrtPriceLimitX96) params) payable returns (uint256 amountOut)',
];
// ── Test ───────────────────────────────────────────────────────────────────
test('I can always leave', async ({ browser }) => {
const config = getStackConfig();
const ctx = await createWalletContext(browser, {
privateKey: PK,
rpcUrl: config.rpcUrl,
});
const page = await ctx.newPage();
page.on('console', msg => console.log(`[BROWSER] ${msg.type()}: ${msg.text()}`));
page.on('pageerror', err => console.log(`[BROWSER ERROR] ${err.message}`));
try {
// ── 1. Load the web app ──────────────────────────────────────────────
console.log('[TEST] Loading web app...');
await page.goto(`${config.webAppUrl}/app/`, { waitUntil: 'domcontentloaded' });
await expect(page.locator('.navbar-title').first()).toBeVisible({ timeout: 30_000 });
// Force desktop-mode recalculation (wallet-provider sets screen.width = 1280)
await page.evaluate(() => window.dispatchEvent(new Event('resize')));
await page.waitForTimeout(2_000);
// ── 2. Connect wallet via the UI ────────────────────────────────────
console.log('[TEST] Connecting wallet...');
const connectButton = page.locator('.connect-button--disconnected').first();
if (await connectButton.isVisible({ timeout: 5_000 })) {
await connectButton.click();
await page.waitForTimeout(1_000);
const connector = page.locator('.connectors-element').first();
if (await connector.isVisible({ timeout: 5_000 })) {
await connector.click();
await page.waitForTimeout(2_000);
} else {
console.log('[TEST] WARNING: wallet connector panel not found after clicking Connect');
}
} else {
console.log('[TEST] Connect button not found — wallet may already be connected or UI class changed');
}
// Confirm wallet address is displayed in the navbar (app shows first ~6 chars)
const addrPrefix = ACCOUNT_ADDRESS.slice(0, 8); // e.g. "0x996550"
await expect(page.getByText(new RegExp(addrPrefix, 'i')).first()).toBeVisible({ timeout: 15_000 });
console.log('[TEST] Wallet connected');
// ── 3. Navigate to get-krk and buy KRK via the swap widget ──────────
console.log('[TEST] Navigating to get-krk...');
await navigateSPA(page, '/app/get-krk');
// The LocalSwapWidget is rendered when VITE_ENABLE_LOCAL_SWAP=true
const swapInput = page.locator('#local-swap-amount');
await expect(swapInput).toBeVisible({ timeout: 15_000 });
console.log('[TEST] Swap widget visible');
const krkBefore = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
console.log(`[TEST] KRK balance before buy: ${krkBefore}`);
await swapInput.fill('0.1');
const buyButton = page.locator('.local-swap-widget .swap-button');
await expect(buyButton).toBeVisible();
console.log('[TEST] Clicking Buy KRK...');
await buyButton.click();
// Wait for the swap to complete (button cycles through "Submitting…" → "Buy KRK")
try {
await buyButton.filter({ hasText: /Submitting/i }).waitFor({ state: 'visible', timeout: 5_000 });
console.log('[TEST] Swap in progress...');
await buyButton.filter({ hasText: /Buy KRK/i }).waitFor({ state: 'visible', timeout: 60_000 });
console.log('[TEST] Swap completed');
} catch (err) {
// On a fast Anvil node the button may cycle too quickly to observe.
// Log the caught value so the root cause is visible if a real error occurred.
console.log(`[TEST] Button state not observed (may be instant): ${err}`);
}
await page.waitForTimeout(2_000);
// ── 4. Verify KRK was received ────────────────────────────────────
const krkAfterBuy = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
console.log(`[TEST] KRK balance after buy: ${krkAfterBuy}`);
expect(krkAfterBuy).toBeGreaterThan(krkBefore);
console.log('[TEST] ✅ KRK received');
// ── 5. Sell all KRK back (sovereign exit) ───────────────────────────
// Encode approve + exactInputSingle calldata in Node.js, then send via
// the injected window.ethereum wallet provider (tests/setup/wallet-provider).
console.log('[TEST] Encoding sell transactions...');
const erc20Iface = new Interface(ERC20_ABI);
const routerIface = new Interface(ROUTER_ABI);
const approveData = erc20Iface.encodeFunctionData('approve', [
SWAP_ROUTER,
BigInt('0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'),
]);
const swapData = routerIface.encodeFunctionData('exactInputSingle', [
{
tokenIn: config.contracts.Kraiken,
tokenOut: WETH,
fee: POOL_FEE,
recipient: ACCOUNT_ADDRESS,
amountIn: krkAfterBuy,
amountOutMinimum: 0n,
sqrtPriceLimitX96: 0n,
},
]);
// Step 5a: approve KRK to the Uniswap router; wait for on-chain confirmation
console.log('[TEST] Approving KRK to router...');
const approveTxHash = await page.evaluate(
({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) =>
(window.ethereum as any).request({
method: 'eth_sendTransaction',
params: [{ from, to: krkAddr, data, gas: '0x30000' }],
}) as Promise<string>,
{ krkAddr: config.contracts.Kraiken, data: approveData, from: ACCOUNT_ADDRESS },
);
await waitForReceipt(config.rpcUrl, approveTxHash);
console.log('[TEST] Approve mined');
// Step 5b: swap KRK → WETH; wait for on-chain confirmation
console.log('[TEST] Swapping KRK → WETH (exit)...');
const swapTxHash = await page.evaluate(
({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) =>
(window.ethereum as any).request({
method: 'eth_sendTransaction',
params: [{ from, to: routerAddr, data, gas: '0x80000' }],
}) as Promise<string>,
{ routerAddr: SWAP_ROUTER, data: swapData, from: ACCOUNT_ADDRESS },
);
await waitForReceipt(config.rpcUrl, swapTxHash);
console.log('[TEST] Swap mined');
// ── 6. Assert KRK was sold ────────────────────────────────────────
const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
console.log(`[TEST] KRK balance after sell: ${krkAfterSell}`);
expect(krkAfterSell).toBeLessThan(krkAfterBuy);
console.log('[TEST] ✅ Sovereign exit confirmed: KRK sold back to WETH');
} finally {
await ctx.close();
}
});