fix: address review feedback on holdout evaluator (#381)

- evaluate.sh: add --ignore-scripts to npm install (prevents husky from
  writing to permanent repo .git/hooks from the ephemeral worktree)
- evaluate.sh: change --silent to --quiet (errors still printed on failure)
- evaluate.sh: add `npx playwright install chromium` step so browser
  binaries are present even when the cached revision doesn't match ^1.55.1
- evaluate.sh: set CI=true inline on the playwright invocation so
  forbidOnly activates and accidental test.only() causes a gate failure
- holdout.config.ts: document that CI=true is supplied by evaluate.sh
- always-leave.spec.ts: add waitForReceipt() helper; replace fixed
  waitForTimeout(2000) after eth_sendTransaction with proper receipt
  polling so tx confirmation is not a timing assumption
- always-leave.spec.ts: log the caught error in the button-cycling
  try/catch so contract reverts surface in the output
- always-leave.spec.ts: add console.log when connect button or connector
  panel is not found to make silent-skip cases diagnosable

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-01 12:04:35 +00:00
parent 2ddd8e9ed2
commit cce3c92120
3 changed files with 55 additions and 19 deletions

View file

@ -148,10 +148,21 @@ log "Building kraiken-lib..."
|| infra_error "kraiken-lib build failed"
# ── Install root npm dependencies (needed for npx playwright test) ─────
# --ignore-scripts: prevents husky from touching the permanent repo's .git/hooks
# from inside this ephemeral worktree.
# --quiet: suppresses normal npm output while still printing errors.
log "Installing root npm dependencies..."
(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \
(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --ignore-scripts --quiet) \
|| infra_error "npm install failed"
# ── Install Playwright browser binaries ────────────────────────────────
# Browser binaries are version-pinned per Playwright revision. If the
# revision resolved by ^1.55.1 is not already cached on this host,
# playwright test aborts immediately with a cryptic "Executable doesn't exist" error.
log "Installing Playwright browser binaries..."
(cd "$WORKTREE_DIR" && npx playwright install chromium) \
|| infra_error "playwright install chromium failed"
# ── Boot the stack ─────────────────────────────────────────────────────
cd "$WORKTREE_DIR"
log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
@ -276,9 +287,11 @@ log " GraphQL: $EVAL_GRAPHQL_URL"
log " WebApp: $EVAL_WEBAPP_URL"
# ── Run holdout Playwright scenarios ──────────────────────────────────
# CI=true triggers forbidOnly in holdout.config.ts so accidental test.only()
# in any scenario file causes an immediate failure rather than a silent partial run.
log "Running holdout scenarios via Playwright..."
cd "$WORKTREE_DIR"
if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
if CI=true npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
log "Gate PASSED"
exit 0
else

View file

@ -17,6 +17,9 @@ import { defineConfig, devices } from '@playwright/test';
export default defineConfig({
testDir: './scenarios',
fullyParallel: false,
// evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always
// active in the evaluator context. Accidental test.only() in any scenario file
// causes an immediate failure rather than a silent partial run.
forbidOnly: !!process.env.CI,
retries: 0,
workers: 1,

View file

@ -45,6 +45,21 @@ async function getKrkBalance(rpcUrl: string, krkAddress: string, account: string
return BigInt(result);
}
/**
* Poll eth_getTransactionReceipt until the tx is mined or maxAttempts exceeded.
* Anvil with automine mines synchronously before returning the tx hash, so this
* resolves almost immediately. The explicit check guards against Anvil instances
* configured with block intervals or high RPC latency.
*/
async function waitForReceipt(rpcUrl: string, txHash: string, maxAttempts = 20): Promise<void> {
for (let i = 0; i < maxAttempts; i++) {
const receipt = await rpcCall(rpcUrl, 'eth_getTransactionReceipt', [txHash]);
if (receipt !== null) return;
await new Promise(r => setTimeout(r, 500));
}
throw new Error(`Transaction ${txHash} not mined after ${maxAttempts * 500}ms`);
}
// ── ABI helpers for the sell path ──────────────────────────────────────────
const ERC20_ABI = ['function approve(address spender, uint256 amount) returns (bool)'];
@ -86,7 +101,11 @@ test('I can always leave', async ({ browser }) => {
if (await connector.isVisible({ timeout: 5_000 })) {
await connector.click();
await page.waitForTimeout(2_000);
} else {
console.log('[TEST] WARNING: wallet connector panel not found after clicking Connect');
}
} else {
console.log('[TEST] Connect button not found — wallet may already be connected or UI class changed');
}
// Confirm wallet address is displayed in the navbar (app shows first ~6 chars)
@ -119,9 +138,10 @@ test('I can always leave', async ({ browser }) => {
console.log('[TEST] Swap in progress...');
await buyButton.filter({ hasText: /Buy KRK/i }).waitFor({ state: 'visible', timeout: 60_000 });
console.log('[TEST] Swap completed');
} catch {
// Swap may have been instant on a fast Anvil node
console.log('[TEST] Swap completed (no intermediate state observed)');
} catch (err) {
// On a fast Anvil node the button may cycle too quickly to observe.
// Log the caught value so the root cause is visible if a real error occurred.
console.log(`[TEST] Button state not observed (may be instant): ${err}`);
}
await page.waitForTimeout(2_000);
@ -155,31 +175,31 @@ test('I can always leave', async ({ browser }) => {
},
]);
// Step 5a: approve KRK to the Uniswap router
// Step 5a: approve KRK to the Uniswap router; wait for on-chain confirmation
console.log('[TEST] Approving KRK to router...');
await page.evaluate(
async ({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) => {
await (window.ethereum as any).request({
const approveTxHash = await page.evaluate(
({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) =>
(window.ethereum as any).request({
method: 'eth_sendTransaction',
params: [{ from, to: krkAddr, data, gas: '0x30000' }],
});
},
}) as Promise<string>,
{ krkAddr: config.contracts.Kraiken, data: approveData, from: ACCOUNT_ADDRESS },
);
await page.waitForTimeout(2_000);
await waitForReceipt(config.rpcUrl, approveTxHash);
console.log('[TEST] Approve mined');
// Step 5b: swap KRK → WETH
// Step 5b: swap KRK → WETH; wait for on-chain confirmation
console.log('[TEST] Swapping KRK → WETH (exit)...');
await page.evaluate(
async ({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) => {
await (window.ethereum as any).request({
const swapTxHash = await page.evaluate(
({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) =>
(window.ethereum as any).request({
method: 'eth_sendTransaction',
params: [{ from, to: routerAddr, data, gas: '0x80000' }],
});
},
}) as Promise<string>,
{ routerAddr: SWAP_ROUTER, data: swapData, from: ACCOUNT_ADDRESS },
);
await page.waitForTimeout(2_000);
await waitForReceipt(config.rpcUrl, swapTxHash);
console.log('[TEST] Swap mined');
// ── 6. Assert KRK was sold ────────────────────────────────────────
const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);