diff --git a/services/ponder/eslint.config.js b/services/ponder/eslint.config.js
index 77fb894..e6d4f05 100644
--- a/services/ponder/eslint.config.js
+++ b/services/ponder/eslint.config.js
@@ -17,6 +17,8 @@ export default [
         process: 'readonly',
         console: 'readonly',
         Context: 'readonly',
+        setInterval: 'readonly',
+        clearInterval: 'readonly',
       },
     },
     plugins: {
diff --git a/services/ponder/src/api/index.ts b/services/ponder/src/api/index.ts
index 362c323..725ed5e 100644
--- a/services/ponder/src/api/index.ts
+++ b/services/ponder/src/api/index.ts
@@ -1,4 +1,5 @@
 import { Hono } from 'hono';
+import type { Context, Next } from 'hono';
 import { cors } from 'hono/cors';
 import { client, graphql } from 'ponder';
 import { db } from 'ponder:api';
@@ -19,11 +20,120 @@ app.use(
   })
 );
 
+// Server-side GraphQL response cache.
+//
+// Without this, every polling client (O(users × 1/poll_interval)) generates a
+// separate DB query. With this 5 s cache + in-flight coalescing, the effective
+// DB hit rate is capped at O(1/5s) regardless of how many clients poll.
+//
+// The TTL matches the Caddy `Cache-Control: public, max-age=5` header so that
+// if a caching CDN or proxy is added later the two layers stay in sync.
+
+const GRAPHQL_CACHE_TTL_MS = 5_000; // 5 seconds – matches Caddy max-age=5
+const MAX_CACHE_ENTRIES = 500; // guard against unbounded growth from unique variable sets
+
+const responseCache = new Map<string, { body: string; expiresAt: number }>();
+
+// In-flight map: when concurrent requests arrive for the same query before the
+// first response is ready, they all await the same DB hit instead of each
+// issuing their own.
+const inFlight = new Map<string, Promise<string | null>>();
+
+// Evict expired entries every 30 s so queries that are never repeated don't
+// accumulate in memory indefinitely.
+const evictInterval = setInterval(() => {
+  const now = Date.now();
+  for (const [k, v] of responseCache) {
+    if (v.expiresAt <= now) responseCache.delete(k);
+  }
+}, 30_000);
+// Don't keep the process alive just for eviction.
+(evictInterval as unknown as { unref?: () => void }).unref?.();
+
+async function graphqlCache(c: Context, next: Next): Promise<Response | void> {
+  if (c.req.method !== 'GET' && c.req.method !== 'POST') return next();
+
+  // Build a stable cache key without consuming the original request body so
+  // the downstream graphql() handler can still read it.
+  // For GET: extract the query string via string operations to avoid `new URL()`
+  // which can throw a TypeError if the URL is relative (no scheme/host).
+  const rawKey =
+    c.req.method === 'POST'
+      ? await c.req.raw.clone().text()
+      : (c.req.url.includes('?') ? c.req.url.slice(c.req.url.indexOf('?')) : '');
+  // Prefix with the route path so /graphql and / never share cache entries,
+  // even though both currently serve identical content.
+  const cacheKey = `${c.req.path}:${rawKey}`;
+
+  const now = Date.now();
+
+  // 1. Cache hit – serve immediately, no DB involved.
+  //    Evict lazily: delete the entry if it has already expired.
+  const hit = responseCache.get(cacheKey);
+  if (hit) {
+    if (hit.expiresAt > now) {
+      return c.body(hit.body, 200, { 'Content-Type': 'application/json' });
+    }
+    responseCache.delete(cacheKey);
+  }
+
+  // 2. In-flight coalescing – N concurrent identical queries share one DB hit.
+  const flying = inFlight.get(cacheKey);
+  if (flying) {
+    const body = await flying;
+    if (body !== null) {
+      return c.body(body, 200, { 'Content-Type': 'application/json' });
+    }
+    // The in-flight request errored; fall through and try again fresh.
+  }
+
+  // 3. Cache miss – run the real graphql() handler and cache a successful result.
+  const promise = (async (): Promise<string | null> => {
+    await next();
+    if (c.res.status !== 200) return null;
+    const body = await c.res.clone().text();
+    try {
+      const parsed = JSON.parse(body) as { errors?: unknown };
+      if (!parsed.errors) {
+        // Evict the oldest entry when the cache is at capacity to prevent
+        // unbounded growth from callers with many unique variable sets.
+        if (responseCache.size >= MAX_CACHE_ENTRIES) {
+          const oldestKey = responseCache.keys().next().value;
+          if (oldestKey !== undefined) responseCache.delete(oldestKey);
+        }
+        responseCache.set(cacheKey, { body, expiresAt: now + GRAPHQL_CACHE_TTL_MS });
+        return body;
+      }
+    } catch {
+      // Non-JSON response; don't cache.
+    }
+    return null;
+  })();
+
+  inFlight.set(cacheKey, promise);
+  // Only delete the key if our promise is still the one registered. A waiting
+  // request may have replaced it with its own promise before our .finally()
+  // fires (microtask ordering), and we must not evict that replacement.
+  promise.finally(() => {
+    if (inFlight.get(cacheKey) === promise) {
+      inFlight.delete(cacheKey);
+    }
+  });
+
+  const body = await promise;
+  if (body !== null) {
+    return c.body(body, 200, { 'Content-Type': 'application/json' });
+  }
+  // Error path: graphql() already populated c.res; let Hono send it as-is.
+}
+
 // SQL endpoint
 app.use('/sql/*', client({ db, schema }));
 
-// GraphQL endpoints
+// GraphQL endpoints with server-side caching + in-flight coalescing
+app.use('/graphql', graphqlCache);
 app.use('/graphql', graphql({ db, schema }));
+app.use('/', graphqlCache);
 app.use('/', graphql({ db, schema }));
 
 export default app;