From fee75932370e3fb17992903c4da9d201d3096d40 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Thu, 25 Jun 2026 12:30:50 +0530 Subject: [PATCH 1/4] test(e2e): reuse one runtime per worker + shared readiness helper (Tier 2) Reuse the rill binary across tests instead of respawning per test: - web-common rillDev fixture now starts rill once per Playwright worker (scope: worker) and resets the project between tests rather than killing the process. Reset clears the previous project's source files (preserving rill.yaml and the runtime's tmp/ DB dir, since deleting rill.yaml orphans resources and an open DuckDB can't be wiped), waits for the old resources to tear down, copies the new project, then waits for full reconciliation. - Tests needing an uninitialized/pristine project (welcome flow: init, rill-yaml empty-project, motherduck) opt out via a new freshInstance option. - Promote waitForReconciliation into web-common/tests/utils and add gotoWhenReady; the fixture now gates every test start on reconciliation instead of a fixed waitForTimeout(1500). Repoint the 5 importers. - Delete the unused startRuntimeForEachTest.ts. workers stays 1 (Tier 2 sharding deferred). Local validation: a 16-test subset across Blank/AdBids transitions with file mutation ran with 1 binary spawn (was ~16) and zero controller-closed/readonly-database teardown races. --- .../tests/fixtures/rill-dev-fixtures.ts | 466 ++++++++++++++---- .../tests/utils/wait-for-reconciliation.ts | 15 + .../connectors/motherduck-connector.spec.ts | 6 +- web-local/tests/explores/annotations.spec.ts | 2 +- web-local/tests/explores/open-query.spec.ts | 2 +- .../explores/time-grain-derivation.spec.ts | 2 +- .../explores/visual-explore-editing.spec.ts | 2 +- web-local/tests/init.spec.ts | 7 +- web-local/tests/rill-yaml.spec.ts | 4 + .../tests/utils/startRuntimeForEachTest.ts | 84 ---- 10 files changed, 401 insertions(+), 189 deletions(-) rename {web-local => web-common}/tests/utils/wait-for-reconciliation.ts (83%) delete mode 100644 web-local/tests/utils/startRuntimeForEachTest.ts diff --git a/web-common/tests/fixtures/rill-dev-fixtures.ts b/web-common/tests/fixtures/rill-dev-fixtures.ts index c34160b10f45..d241b80b84f9 100644 --- a/web-common/tests/fixtures/rill-dev-fixtures.ts +++ b/web-common/tests/fixtures/rill-dev-fixtures.ts @@ -1,8 +1,19 @@ import type { Page } from "@playwright/test"; import { asyncWaitUntil } from "@rilldata/web-common/lib/waitUtils.ts"; +import { + V1ReconcileStatus, + type V1Resource, +} from "@rilldata/web-common/runtime-client/gen/index.schemas"; import axios from "axios"; -import { spawn } from "node:child_process"; -import { cpSync, existsSync, mkdirSync, rmSync } from "node:fs"; +import { spawn, type ChildProcess } from "node:child_process"; +import { + cpSync, + existsSync, + mkdirSync, + readdirSync, + rmSync, + writeFileSync, +} from "node:fs"; import { join } from "node:path"; import { test as base, expect } from "playwright/test"; import treeKill from "tree-kill"; @@ -10,15 +21,37 @@ import { getOpenPort } from "@rilldata/web-common/tests/utils/get-open-port.ts"; import { makeTempDir } from "@rilldata/web-common/tests/utils/make-temp-dir.ts"; import { spawnAndMatch } from "@rilldata/web-common/tests/utils/spawn.ts"; +// Matches cli/pkg/local.DefaultDBDir: the directory inside the project where the +// running runtime keeps its DuckDB and catalog. It must be preserved when +// resetting a reused runtime's project, or the open database is corrupted. +const DB_DIR = "tmp"; +const RILL_YAML = "rill.yaml"; +const ProjectParserKind = "rill.runtime.v1.ProjectParser"; + +// The long-lived runtime shared by every test on a worker. +type SharedRuntime = { + port: number; + projectDir: string; + homeDir: string; +}; + type MyFixtures = { cliHomeDir: string; project: string | undefined; projectDir: string | undefined; - rillDevPage: Page; rillDevBrowserState: string | undefined; + // Opt out of the shared per-worker runtime and get a pristine instance for + // this test. Use for tests that mutate global runtime state (env changes, + // controller restarts) and can't tolerate a reused process. + freshInstance: boolean; + rillDevPage: Page; }; -export const rillDev = base.extend({ +type MyWorkerFixtures = { + sharedRuntime: SharedRuntime; +}; + +export const rillDev = base.extend({ // Add a default home if cliHome is not provided so that tests always have a different home than the user's home. // This will make sure that login status won't conflicts with dev's login status when run locally. cliHomeDir: [makeTempDir("home"), { option: true }], @@ -29,83 +62,86 @@ export const rillDev = base.extend({ // If set, used to create the context used to create the rillDevPage. // A fresh context is used if not provided. rillDevBrowserState: [undefined, { option: true }], + freshInstance: [false, { option: true }], + + // Start the rill binary once per worker and reuse it across every test the + // worker runs. Between tests the project is reset (see resetSharedProject) + // rather than killing the process, which avoids ~one binary spawn per test + // and the DuckDB teardown races that came with it. + sharedRuntime: [ + // eslint-disable-next-line no-empty-pattern -- Playwright requires the fixtures arg even when unused + async ({}, use) => { + const port = await getOpenPort(); + const grpcPort = await getOpenPort(); + const homeDir = makeTempDir("home"); + const projectDir = makeTempDir("project"); + + const childProcess = await startRuntime({ + port, + grpcPort, + projectDir, + homeDir, + }); + + try { + await use({ port, projectDir, homeDir }); + } finally { + await stopRuntime(childProcess, projectDir); + } + }, + { scope: "worker" }, + ], rillDevPage: async ( { browser, + sharedRuntime, project, projectDir, cliHomeDir, rillDevBrowserState, + freshInstance, timezoneId, locale, }, use, ) => { - const TEST_PORT = await getOpenPort(); - const TEST_GRPC_PORT = await getOpenPort(); - const TEST_PROJECT_DIRECTORY = - projectDir ?? makeTempDir(`projects-${TEST_PORT}`); - - // Switch env to "dev" so that this points to the locally started rill cloud. - // For tests that involve a local cloud this will point to it. - // Otherwise, when running in a dev's machine, it will avoid pointing to prod cloud and bombard prod. - await spawnAndMatch( - "../rill", - "devtool switch-env dev".split(" "), - /Set default env to "dev"/, - { - additionalEnv: { - // Override home so that the instance is isolated for the provided cliHome. - HOME: cliHomeDir, - }, - }, - ); + // A test needs its own instance when it explicitly opts out, or when it + // pins a specific project directory that the shared runtime doesn't watch. + const needsOwnInstance = freshInstance || projectDir !== undefined; - rmSync(TEST_PROJECT_DIRECTORY, { force: true, recursive: true }); + let port: number; + let ownProcess: ChildProcess | undefined; + let ownProjectDir: string | undefined; - if (!existsSync(TEST_PROJECT_DIRECTORY)) { - mkdirSync(TEST_PROJECT_DIRECTORY, { recursive: true }); - } + if (needsOwnInstance) { + port = await getOpenPort(); + const grpcPort = await getOpenPort(); + ownProjectDir = projectDir ?? makeTempDir(`projects-${port}`); + + rmSync(ownProjectDir, { force: true, recursive: true }); + mkdirSync(ownProjectDir, { recursive: true }); + if (project) { + cpSync(projectFixtureDir(project), ownProjectDir, { + recursive: true, + force: true, + }); + } - if (project) { - const sourceProjectDir = join( - import.meta.dirname, - "../projects", - project, - ); - cpSync(sourceProjectDir, TEST_PROJECT_DIRECTORY, { - recursive: true, - force: true, + ownProcess = await startRuntime({ + port, + grpcPort, + projectDir: ownProjectDir, + homeDir: cliHomeDir, }); + } else { + port = sharedRuntime.port; + await resetSharedProject(sharedRuntime, project); } - const cmd = `start --no-open --port ${TEST_PORT} --port-grpc ${TEST_GRPC_PORT} ${TEST_PROJECT_DIRECTORY}`; - - const childProcess = spawn("../rill", cmd.split(" "), { - stdio: "inherit", - shell: true, - env: { - ...process.env, - // Override home so that the instance is isolated for the provided cliHome. - // Login status will be siloed for tests using the same cliHome. - HOME: cliHomeDir, - }, - }); - - childProcess.on("error", console.log); - - // Ping runtime until it's ready - await asyncWaitUntil(async () => { - try { - const response = await axios.get( - `http://localhost:${TEST_PORT}/v1/ping`, - ); - return response.status === 200; - } catch { - return false; - } - }); + // Wait for the project to fully reconcile before any test interaction, so + // tests never navigate to an explore/canvas that doesn't exist yet. + await waitForProjectReady(port, projectHasResources(project)); const context = await browser.newContext({ storageState: rillDevBrowserState ?? { cookies: [], origins: [] }, @@ -114,47 +150,281 @@ export const rillDev = base.extend({ }); const page = await context.newPage(); - await page.goto(`http://localhost:${TEST_PORT}`); - - // Give the runtime time to reconcile initial resources. Tests that - // navigate directly to explore URLs (via page.goto) need the explore - // to exist before navigation. - await page.waitForTimeout(1500); + await page.goto(`http://localhost:${port}`); await use(page); - // Close browser context to release any connections/resources first + // Close browser context to release any connections/resources first. await context.close(); - const processExit = new Promise((resolve) => { - childProcess.on("exit", resolve); + // Only per-test instances are torn down here; the shared runtime lives for + // the lifetime of the worker. + if (ownProcess) await stopRuntime(ownProcess, ownProjectDir!); + }, +}); + +function projectFixtureDir(project: string) { + return join(import.meta.dirname, "../projects", project); +} + +/** Whether a project fixture defines resources (anything beyond rill.yaml). */ +function projectHasResources(project: string | undefined): boolean { + if (!project) return false; + const stack = [projectFixtureDir(project)]; + while (stack.length > 0) { + const dir = stack.pop()!; + for (const entry of readdirSync(dir, { withFileTypes: true })) { + if (entry.isDirectory()) { + stack.push(join(dir, entry.name)); + continue; + } + if (entry.name === "rill.yaml") continue; + if (/\.(sql|yaml|yml)$/.test(entry.name)) return true; + } + } + return false; +} + +type StartRuntimeOptions = { + port: number; + grpcPort: number; + projectDir: string; + homeDir: string; +}; + +async function startRuntime({ + port, + grpcPort, + projectDir, + homeDir, +}: StartRuntimeOptions): Promise { + // Switch env to "dev" so that this points to the locally started rill cloud. + // For tests that involve a local cloud this will point to it. Otherwise, when + // running on a dev's machine, it avoids pointing to prod cloud. + await spawnAndMatch( + "../rill", + "devtool switch-env dev".split(" "), + /Set default env to "dev"/, + { + // Override home so that the instance is isolated for the provided home. + additionalEnv: { HOME: homeDir }, + }, + ); + + rmSync(projectDir, { force: true, recursive: true }); + mkdirSync(projectDir, { recursive: true }); + + const cmd = `start --no-open --port ${port} --port-grpc ${grpcPort} ${projectDir}`; + const childProcess = spawn("../rill", cmd.split(" "), { + stdio: "inherit", + shell: true, + env: { + ...process.env, + // Override home so that the instance is isolated for the provided home. + HOME: homeDir, + }, + }); + childProcess.on("error", console.log); + + // Ping runtime until it's ready. + await asyncWaitUntil(async () => { + try { + const response = await axios.get(`http://localhost:${port}/v1/ping`); + return response.status === 200; + } catch { + return false; + } + }); + + return childProcess; +} + +async function stopRuntime(childProcess: ChildProcess, projectDir: string) { + const processExit = new Promise((resolve) => { + childProcess.on("exit", resolve); + }); + + if (childProcess.pid) treeKill(childProcess.pid); + + await processExit; + + // Remove the project directory after the dev process has fully exited. + // Use expect.poll with exponential intervals to handle transient FS errors. + await expect + .poll( + () => { + try { + rmSync(projectDir, { force: true, recursive: true }); + return true; + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + const isTransient = + code === "ENOTEMPTY" || code === "EBUSY" || code === "EPERM"; + if (isTransient) return false; + throw err; + } + }, + { + intervals: [200, 400, 800, 1600, 3200], + timeout: 7000, + }, + ) + .toBe(true); +} + +/** + * Reset a reused runtime's project to the given fixture. Clears the previous + * project's source files (preserving the runtime's open DB dir), waits for the + * runtime to tear the old resources down, then copies in the new fixture. The + * explicit empty barrier avoids racing the file watcher: without it, readiness + * could observe the previous project still idle and return stale. + */ +async function resetSharedProject( + runtime: SharedRuntime, + project: string | undefined, +) { + // Keep a rill.yaml present at all times. Deleting it puts the parser into an + // error state where it stops removing the previous project's resources, so + // the clear barrier below would never observe them disappear. + const rillYamlPath = join(runtime.projectDir, RILL_YAML); + if (!existsSync(rillYamlPath)) { + writeFileSync(rillYamlPath, "compiler: rillv1\n"); + } + + // Remove the previous project's source files, preserving rill.yaml and the + // running runtime's DB dir, then wait for its resources to be torn down. + for (const entry of readdirSync(runtime.projectDir)) { + if (entry === DB_DIR || entry === RILL_YAML) continue; + rmSync(join(runtime.projectDir, entry), { force: true, recursive: true }); + } + + await waitForNoDataResources(runtime.port); + + // Copy the new project in; its own rill.yaml overwrites the placeholder. + if (project) { + cpSync(projectFixtureDir(project), runtime.projectDir, { + recursive: true, + force: true, }); + } +} + +async function fetchResources(port: number): Promise { + const response = await axios.get( + `http://localhost:${port}/v1/instances/default/resources`, + ); + return (response.data?.resources ?? []) as V1Resource[]; +} + +function dataResourcesOf(resources: V1Resource[]) { + return resources.filter((r) => r.meta?.name?.kind !== ProjectParserKind); +} - if (childProcess.pid) treeKill(childProcess.pid); - - await processExit; - - // Remove the test project directory after the dev process has fully exited. - // Use expect.poll with exponential intervals to handle transient FS errors. - await expect - .poll( - () => { - try { - rmSync(TEST_PROJECT_DIRECTORY, { force: true, recursive: true }); - return true; - } catch (err) { - const code = (err as NodeJS.ErrnoException)?.code; - const isTransient = - code === "ENOTEMPTY" || code === "EBUSY" || code === "EPERM"; - if (isTransient) return false; - throw err; - } - }, - { - intervals: [200, 400, 800, 1600, 3200], - timeout: 7000, - }, +function isIdle(resource: V1Resource) { + return ( + resource.meta?.reconcileStatus === V1ReconcileStatus.RECONCILE_STATUS_IDLE + ); +} + +/** + * Wait until the previous project's data resources have been torn down. The + * ProjectParser is intentionally ignored: it stays RUNNING while watching the + * repo and never settles to idle. + */ +async function waitForNoDataResources(port: number, timeoutMs = 30_000) { + const cleared = await asyncWaitUntil(async () => { + try { + const resources = await fetchResources(port); + return dataResourcesOf(resources).length === 0; + } catch { + return false; + } + }, timeoutMs); + + if (!cleared) { + throw new Error( + `Project did not clear on port ${port} within ${timeoutMs}ms`, + ); + } +} + +/** + * Wait until the project has fully reconciled: every data resource is idle and + * the resource set is stable across consecutive polls (so we don't return + * mid-reconcile while resources are still being created). When the project + * defines resources, requires at least one to be present. The ProjectParser is + * ignored: it stays RUNNING while watching the repo and never settles to idle. + */ +async function waitForProjectReady( + port: number, + expectResources: boolean, + timeoutMs = 60_000, +) { + let prevSignature = ""; + let stablePolls = 0; + let lastResources: V1Resource[] = []; + + const ready = await asyncWaitUntil(async () => { + let resources: V1Resource[]; + try { + resources = await fetchResources(port); + } catch { + stablePolls = 0; + return false; + } + lastResources = resources; + + const data = dataResourcesOf(resources); + if (expectResources && data.length === 0) { + stablePolls = 0; + return false; + } + if (!data.every(isIdle)) { + stablePolls = 0; + return false; + } + + const signature = data + .map( + (r) => + `${r.meta?.name?.kind}/${r.meta?.name?.name}@${r.meta?.stateVersion}`, ) - .toBe(true); - }, -}); + .sort() + .join("|"); + if (signature === prevSignature) { + stablePolls += 1; + } else { + prevSignature = signature; + stablePolls = 1; + } + // Several consecutive identical polls (~250ms apart) means reconciliation + // has settled rather than still creating resources. + return stablePolls >= 3; + }, timeoutMs); + + if (!ready) { + const pending = dataResourcesOf(lastResources) + .filter((r) => !isIdle(r)) + .map( + (r) => + `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileStatus}`, + ) + .join("\n"); + throw new Error( + `Project did not become ready on port ${port}. Still pending:\n${pending || "(none)"}`, + ); + } + + const errors = dataResourcesOf(lastResources).filter( + (r) => r.meta?.reconcileError, + ); + if (errors.length > 0) { + const details = errors + .map( + (r) => + `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileError}`, + ) + .join("\n"); + throw new Error(`Reconciliation errors:\n${details}`); + } +} diff --git a/web-local/tests/utils/wait-for-reconciliation.ts b/web-common/tests/utils/wait-for-reconciliation.ts similarity index 83% rename from web-local/tests/utils/wait-for-reconciliation.ts rename to web-common/tests/utils/wait-for-reconciliation.ts index 733c6c907883..2677e5040109 100644 --- a/web-local/tests/utils/wait-for-reconciliation.ts +++ b/web-common/tests/utils/wait-for-reconciliation.ts @@ -74,3 +74,18 @@ export async function waitForReconciliation(page: Page, timeoutMs = 60_000) { return dataResources; } + +/** + * Navigates to a URL that depends on freshly created or edited resources, then + * waits for the project to finish reconciling before returning. Use this in + * place of a bare `page.goto(...)` whenever the destination needs resources to + * be ready (the recurring cause of navigate-before-reconcile flakiness). + */ +export async function gotoWhenReady( + page: Page, + url: string, + timeoutMs = 60_000, +) { + await page.goto(url); + await waitForReconciliation(page, timeoutMs); +} diff --git a/web-local/tests/connectors/motherduck-connector.spec.ts b/web-local/tests/connectors/motherduck-connector.spec.ts index 683efa7cd5c7..1e67b591dd5e 100644 --- a/web-local/tests/connectors/motherduck-connector.spec.ts +++ b/web-local/tests/connectors/motherduck-connector.spec.ts @@ -3,8 +3,10 @@ import { test } from "../setup/base"; import { updateCodeEditor } from "../utils/commonHelpers"; test.describe("MotherDuck welcome flow", () => { - // Start from an empty workspace to exercise the onboarding path - test.use({ project: undefined }); + // Start from an empty workspace to exercise the onboarding path. The welcome + // flow needs an uninitialized project, so use a pristine instance rather than + // the shared per-worker runtime (which keeps a rill.yaml present). + test.use({ project: undefined, freshInstance: true }); test("initializes MotherDuck from welcome screen and persists secrets before connector", async ({ page, diff --git a/web-local/tests/explores/annotations.spec.ts b/web-local/tests/explores/annotations.spec.ts index 60a184398fb4..de7750705948 100644 --- a/web-local/tests/explores/annotations.spec.ts +++ b/web-local/tests/explores/annotations.spec.ts @@ -4,7 +4,7 @@ import { V1TimeGrain } from "@rilldata/web-common/runtime-client/gen/index.schem import { interactWithTimeRangeMenu } from "@rilldata/web-common/tests/utils/explore-interactions"; import { DateTime } from "luxon"; import { test } from "../setup/base"; -import { waitForReconciliation } from "../utils/wait-for-reconciliation"; +import { waitForReconciliation } from "@rilldata/web-common/tests/utils/wait-for-reconciliation"; // Annotation timestamps as they'll be serialized from DuckDB (UTC). // All annotations that may be visible at day grain in "Last 7 days": diff --git a/web-local/tests/explores/open-query.spec.ts b/web-local/tests/explores/open-query.spec.ts index 7f645b48d505..a5bc16c1545d 100644 --- a/web-local/tests/explores/open-query.spec.ts +++ b/web-local/tests/explores/open-query.spec.ts @@ -1,5 +1,5 @@ import { test } from "../setup/base"; -import { waitForReconciliation } from "../utils/wait-for-reconciliation.ts"; +import { waitForReconciliation } from "@rilldata/web-common/tests/utils/wait-for-reconciliation"; test.describe("Query-to-Explore routing", () => { test.use({ project: "AdBids" }); diff --git a/web-local/tests/explores/time-grain-derivation.spec.ts b/web-local/tests/explores/time-grain-derivation.spec.ts index 811f9284aef9..ee83c0939382 100644 --- a/web-local/tests/explores/time-grain-derivation.spec.ts +++ b/web-local/tests/explores/time-grain-derivation.spec.ts @@ -1,6 +1,6 @@ import { expect, type Page } from "@playwright/test"; import { test } from "../setup/base"; -import { waitForReconciliation } from "../utils/wait-for-reconciliation.ts"; +import { waitForReconciliation } from "@rilldata/web-common/tests/utils/wait-for-reconciliation"; test.describe("Time grain derivation from URL", () => { test.use({ project: "AdBids" }); diff --git a/web-local/tests/explores/visual-explore-editing.spec.ts b/web-local/tests/explores/visual-explore-editing.spec.ts index 78737fd0125b..4fb2fc6b1dbd 100644 --- a/web-local/tests/explores/visual-explore-editing.spec.ts +++ b/web-local/tests/explores/visual-explore-editing.spec.ts @@ -1,6 +1,6 @@ import { expect } from "@playwright/test"; import { test } from "../setup/base"; -import { waitForReconciliation } from "../utils/wait-for-reconciliation"; +import { waitForReconciliation } from "@rilldata/web-common/tests/utils/wait-for-reconciliation"; import { gotoNavEntry } from "../utils/waitHelpers"; test.describe("visual explore editing", () => { diff --git a/web-local/tests/init.spec.ts b/web-local/tests/init.spec.ts index 46b389f8fabc..b2dec6570a25 100644 --- a/web-local/tests/init.spec.ts +++ b/web-local/tests/init.spec.ts @@ -2,7 +2,12 @@ import { EXAMPLES } from "@rilldata/web-common/features/welcome/constants"; import { expect } from "playwright/test"; import { test } from "./setup/base"; import { splitFolderAndFileName } from "@rilldata/web-common/features/entity-management/file-path-utils.ts"; -import { waitForReconciliation } from "./utils/wait-for-reconciliation"; +import { waitForReconciliation } from "@rilldata/web-common/tests/utils/wait-for-reconciliation"; + +// These tests drive the welcome / project-initialization flow, which only +// appears for an uninitialized project. The shared per-worker runtime always +// keeps a rill.yaml present, so these need their own pristine instance. +test.use({ freshInstance: true }); test.describe("Example project initialization", () => { EXAMPLES.forEach((example) => { diff --git a/web-local/tests/rill-yaml.spec.ts b/web-local/tests/rill-yaml.spec.ts index 6f866b79d016..f0468c4572a6 100644 --- a/web-local/tests/rill-yaml.spec.ts +++ b/web-local/tests/rill-yaml.spec.ts @@ -13,6 +13,10 @@ async function expectRillYAMLToContainOlapConnector(page: Page, text: string) { } test.describe("Default olap_connector behavior", () => { + // Exercises the empty/uninitialized project flow, which needs a pristine + // instance rather than the shared per-worker runtime (which keeps a rill.yaml). + test.use({ freshInstance: true }); + test("Should set default olap_connector to duckdb for empty project", async ({ page, }) => { diff --git a/web-local/tests/utils/startRuntimeForEachTest.ts b/web-local/tests/utils/startRuntimeForEachTest.ts deleted file mode 100644 index 0011f8e6cab5..000000000000 --- a/web-local/tests/utils/startRuntimeForEachTest.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { test } from "@playwright/test"; -import { rmSync, writeFileSync, existsSync, mkdirSync } from "fs"; -import { spawn } from "node:child_process"; -import type { ChildProcess } from "node:child_process"; -import treeKill from "tree-kill"; -import { isPortOpen } from "@rilldata/web-local/lib/util/isPortOpen"; -import { asyncWaitUntil, waitUntil } from "@rilldata/web-common/lib/waitUtils"; -import axios from "axios"; - -const TEST_PROJECT_DIRECTORY = "temp/test-project"; -const TEST_PORT = 8083; -const TEST_PORT_GRPC = 9083; - -export function startRuntimeForEachTest() { - let childProcess: ChildProcess; - let rillShutdown = false; - - test.beforeEach(async () => { - rmSync(TEST_PROJECT_DIRECTORY, { - force: true, - recursive: true, - }); - if (!existsSync(TEST_PROJECT_DIRECTORY)) { - mkdirSync(TEST_PROJECT_DIRECTORY, { recursive: true }); - } - // Add `rill.yaml` file to the project repo - writeFileSync( - `${TEST_PROJECT_DIRECTORY}/rill.yaml`, - 'compiler: rill-beta\ntitle: "Test Project"', - ); - - const cmd = `start --no-open --port ${TEST_PORT} --port-grpc ${TEST_PORT_GRPC} --db ${TEST_PROJECT_DIRECTORY}/stage.db?rill_pool_size=4 ${TEST_PROJECT_DIRECTORY} --env connector.duckdb.external_table_storage=false`; - - childProcess = spawn("../rill", cmd.split(" "), { - stdio: "pipe", - shell: true, - }); - childProcess.on("error", console.log); - // Runtime sometimes ends the process but still hasnt released closed the duckdb connection. - // So wait for the stdio to close. We also need to set `stdio: pipe` and forward the io - childProcess.on("close", () => { - rillShutdown = true; - }); - childProcess.stdout?.on("data", (chunk: Uint8Array) => { - process.stdout?.write(chunk); - }); - childProcess.stderr?.on("data", (chunk: Uint8Array) => { - process.stdout?.write(chunk); - }); - - // Ping runtime until it's ready - await asyncWaitUntil(async () => { - try { - const response = await axios.get( - `http://localhost:${TEST_PORT}/v1/ping`, - ); - return response.status === 200; - } catch { - return false; - } - }); - }); - - test.afterEach(async () => { - const processExit = new Promise((resolve) => { - if (childProcess.pid) - treeKill(childProcess.pid, () => { - resolve(); - }); - else { - resolve(); - } - }); - await asyncWaitUntil(async () => !(await isPortOpen(TEST_PORT))); - await processExit; - - await waitUntil(() => rillShutdown, 5000); - - rmSync(TEST_PROJECT_DIRECTORY, { - force: true, - recursive: true, - }); - }); -} From cf706df8ff4227870a191549e0818da598fb8153 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Thu, 25 Jun 2026 13:21:20 +0530 Subject: [PATCH 2/4] test(e2e): fix CI failures from runtime reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - clickhouse/postgres connectors: mark freshInstance. They drive the welcome flow (needs an uninitialized project) and clickhouse restarts the controller by changing olap_connector — both incompatible with the shared per-worker runtime. Fixes the two web-local failures. - waitForProjectReady: key stability on resource names only, not stateVersion. A resource that re-reconciles back to idle (common during a deploy) was resetting the stability counter and stalling readiness, which failed web-integration with 'did not become ready' and no pending resources. - cliHomeDir: default to undefined and route any test that sets it to its own instance. The deploy journey relies on CLI auth state in a specific home, which the shared runtime's worker home would not provide. --- .../tests/fixtures/rill-dev-fixtures.ts | 29 +++++++++++-------- .../connectors/clickhouse-connector.spec.ts | 6 ++++ .../connectors/postgres-connector.spec.ts | 4 +++ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/web-common/tests/fixtures/rill-dev-fixtures.ts b/web-common/tests/fixtures/rill-dev-fixtures.ts index d241b80b84f9..05f41f732510 100644 --- a/web-common/tests/fixtures/rill-dev-fixtures.ts +++ b/web-common/tests/fixtures/rill-dev-fixtures.ts @@ -36,7 +36,7 @@ type SharedRuntime = { }; type MyFixtures = { - cliHomeDir: string; + cliHomeDir: string | undefined; project: string | undefined; projectDir: string | undefined; rillDevBrowserState: string | undefined; @@ -52,9 +52,11 @@ type MyWorkerFixtures = { }; export const rillDev = base.extend({ - // Add a default home if cliHome is not provided so that tests always have a different home than the user's home. - // This will make sure that login status won't conflicts with dev's login status when run locally. - cliHomeDir: [makeTempDir("home"), { option: true }], + // When set, the test gets its own pristine runtime using this home (rather + // than the shared per-worker runtime). Tests that depend on CLI login/auth + // state living in a specific home (e.g. the deploy journey) set this. When + // unset, the shared runtime's isolated worker home is used. + cliHomeDir: [undefined, { option: true }], project: [undefined, { option: true }], // We default to using a randomly created temporary directory for project. // This can be used to get a consistent @@ -106,9 +108,11 @@ export const rillDev = base.extend({ }, use, ) => { - // A test needs its own instance when it explicitly opts out, or when it - // pins a specific project directory that the shared runtime doesn't watch. - const needsOwnInstance = freshInstance || projectDir !== undefined; + // A test needs its own instance when it explicitly opts out, pins a + // specific project directory the shared runtime doesn't watch, or depends on + // a specific CLI home (the shared runtime uses its own isolated worker home). + const needsOwnInstance = + freshInstance || projectDir !== undefined || cliHomeDir !== undefined; let port: number; let ownProcess: ChildProcess | undefined; @@ -132,7 +136,7 @@ export const rillDev = base.extend({ port, grpcPort, projectDir: ownProjectDir, - homeDir: cliHomeDir, + homeDir: cliHomeDir ?? makeTempDir("home"), }); } else { port = sharedRuntime.port; @@ -384,11 +388,12 @@ async function waitForProjectReady( return false; } + // Track the set of resource names only, not their stateVersion: a resource + // that re-reconciles and returns to idle (common while a project is being + // deployed) shouldn't reset stability and stall readiness. We only need the + // resource set to stop growing/shrinking while everything is idle. const signature = data - .map( - (r) => - `${r.meta?.name?.kind}/${r.meta?.name?.name}@${r.meta?.stateVersion}`, - ) + .map((r) => `${r.meta?.name?.kind}/${r.meta?.name?.name}`) .sort() .join("|"); if (signature === prevSignature) { diff --git a/web-local/tests/connectors/clickhouse-connector.spec.ts b/web-local/tests/connectors/clickhouse-connector.spec.ts index 7f19d6483fb9..d9ee557f07d8 100644 --- a/web-local/tests/connectors/clickhouse-connector.spec.ts +++ b/web-local/tests/connectors/clickhouse-connector.spec.ts @@ -11,6 +11,12 @@ test.describe("ClickHouse connector", () => { // TODO: fix FileAndResourceWatcher to be more robust. test.describe.configure({ retries: 3 }); + // These tests drive the welcome flow (which needs an uninitialized project) + // and change the olap_connector, which restarts the controller. Both are + // incompatible with the shared per-worker runtime, so each test gets its own + // pristine instance. + test.use({ freshInstance: true }); + const clickhouseOne = new ClickHouseTestContainer(); const clickhouseTwo = new ClickHouseTestContainer(); diff --git a/web-local/tests/connectors/postgres-connector.spec.ts b/web-local/tests/connectors/postgres-connector.spec.ts index d86fa0b4be43..ef6842ed81ca 100644 --- a/web-local/tests/connectors/postgres-connector.spec.ts +++ b/web-local/tests/connectors/postgres-connector.spec.ts @@ -9,6 +9,10 @@ test.describe.skip("Postgres connector", () => { // So retry to not make the tests flaky. test.describe.configure({ retries: 3 }); + // The welcome-screen tests need an uninitialized project, so each test gets + // its own pristine instance rather than the shared per-worker runtime. + test.use({ freshInstance: true }); + const postgresOne = new PostgresTestContainer(); const postgresTwo = new PostgresTestContainer(); From ddbf5bc6827ff2f52576e07095b6dacdf3799cc8 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Thu, 25 Jun 2026 14:05:55 +0530 Subject: [PATCH 3/4] test(e2e): scope connector freshInstance to welcome screens; tolerate deploy churn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - clickhouse/postgres: move freshInstance onto only the 'Welcome screen' blocks (which need an uninitialized project). The 'Home page' blocks stay on the shared runtime — they passed there, and the per-test reset recovers the controller restart that clickhouse's olap_connector change triggers. Marking the whole describe freshInstance had regressed the Home page tests. - waitForProjectReady: if the resource set never stabilizes within the timeout but the project did reach a fully-idle state, proceed instead of failing. The deploy tests' background cloud sync churns the resource set continuously, which previously failed with 'did not become ready (none pending)'. Validated locally: clickhouse welcome (fresh) + home page (shared) + a trailing shared spec all pass, confirming the olap change doesn't corrupt the reused runtime. --- .../tests/fixtures/rill-dev-fixtures.ts | 54 +++++++++++-------- .../connectors/clickhouse-connector.spec.ts | 11 ++-- .../connectors/postgres-connector.spec.ts | 9 ++-- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/web-common/tests/fixtures/rill-dev-fixtures.ts b/web-common/tests/fixtures/rill-dev-fixtures.ts index 05f41f732510..6d66d97d72c8 100644 --- a/web-common/tests/fixtures/rill-dev-fixtures.ts +++ b/web-common/tests/fixtures/rill-dev-fixtures.ts @@ -366,6 +366,7 @@ async function waitForProjectReady( ) { let prevSignature = ""; let stablePolls = 0; + let reachedIdle = false; let lastResources: V1Resource[] = []; const ready = await asyncWaitUntil(async () => { @@ -388,6 +389,11 @@ async function waitForProjectReady( return false; } + // The project has fully reconciled at least once. Used as a fallback below + // if the resource set never stabilizes (e.g. background cloud sync in the + // deploy tests keeps churning it). + reachedIdle = true; + // Track the set of resource names only, not their stateVersion: a resource // that re-reconciles and returns to idle (common while a project is being // deployed) shouldn't reset stability and stall readiness. We only need the @@ -407,29 +413,35 @@ async function waitForProjectReady( return stablePolls >= 3; }, timeoutMs); - if (!ready) { - const pending = dataResourcesOf(lastResources) - .filter((r) => !isIdle(r)) - .map( - (r) => - `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileStatus}`, - ) - .join("\n"); - throw new Error( - `Project did not become ready on port ${port}. Still pending:\n${pending || "(none)"}`, + if (ready) { + const errors = dataResourcesOf(lastResources).filter( + (r) => r.meta?.reconcileError, ); + if (errors.length > 0) { + const details = errors + .map( + (r) => + `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileError}`, + ) + .join("\n"); + throw new Error(`Reconciliation errors:\n${details}`); + } + return; } - const errors = dataResourcesOf(lastResources).filter( - (r) => r.meta?.reconcileError, + // The set never stabilized within the timeout. If the project did fully + // reconcile at some point, proceed: the churn is background activity (e.g. + // cloud sync), not an unready project. Only fail if it never reached idle. + if (reachedIdle) return; + + const pending = dataResourcesOf(lastResources) + .filter((r) => !isIdle(r)) + .map( + (r) => + `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileStatus}`, + ) + .join("\n"); + throw new Error( + `Project did not become ready on port ${port}. Still pending:\n${pending || "(none)"}`, ); - if (errors.length > 0) { - const details = errors - .map( - (r) => - `${r.meta?.name?.kind}/${r.meta?.name?.name}: ${r.meta?.reconcileError}`, - ) - .join("\n"); - throw new Error(`Reconciliation errors:\n${details}`); - } } diff --git a/web-local/tests/connectors/clickhouse-connector.spec.ts b/web-local/tests/connectors/clickhouse-connector.spec.ts index d9ee557f07d8..7fc4779f2442 100644 --- a/web-local/tests/connectors/clickhouse-connector.spec.ts +++ b/web-local/tests/connectors/clickhouse-connector.spec.ts @@ -11,12 +11,6 @@ test.describe("ClickHouse connector", () => { // TODO: fix FileAndResourceWatcher to be more robust. test.describe.configure({ retries: 3 }); - // These tests drive the welcome flow (which needs an uninitialized project) - // and change the olap_connector, which restarts the controller. Both are - // incompatible with the shared per-worker runtime, so each test gets its own - // pristine instance. - test.use({ freshInstance: true }); - const clickhouseOne = new ClickHouseTestContainer(); const clickhouseTwo = new ClickHouseTestContainer(); @@ -33,6 +27,11 @@ test.describe("ClickHouse connector", () => { }); test.describe("Welcome screen", () => { + // The welcome flow needs an uninitialized project, so use a pristine + // instance rather than the shared per-worker runtime (which keeps a + // rill.yaml present). The Home page block below stays on the shared runtime. + test.use({ freshInstance: true }); + test("Create connector using individual fields", async ({ page }) => { // Open the connect to clickhouse modal await page.getByLabel("Connect to clickhouse").click(); diff --git a/web-local/tests/connectors/postgres-connector.spec.ts b/web-local/tests/connectors/postgres-connector.spec.ts index ef6842ed81ca..3aa647b3ef37 100644 --- a/web-local/tests/connectors/postgres-connector.spec.ts +++ b/web-local/tests/connectors/postgres-connector.spec.ts @@ -9,10 +9,6 @@ test.describe.skip("Postgres connector", () => { // So retry to not make the tests flaky. test.describe.configure({ retries: 3 }); - // The welcome-screen tests need an uninitialized project, so each test gets - // its own pristine instance rather than the shared per-worker runtime. - test.use({ freshInstance: true }); - const postgresOne = new PostgresTestContainer(); const postgresTwo = new PostgresTestContainer(); @@ -29,6 +25,11 @@ test.describe.skip("Postgres connector", () => { }); test.describe("Welcome screen", () => { + // The welcome flow needs an uninitialized project, so use a pristine + // instance rather than the shared per-worker runtime. The Home page block + // below stays on the shared runtime. + test.use({ freshInstance: true }); + test("Create connector using individual fields", async ({ page }) => { await page.getByLabel("See more connectors").click(); await enterPostgresCredentials(page, postgresOne); From f8b2d88ae47b74d88891946dfc3aafb313956962 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Thu, 25 Jun 2026 14:30:59 +0530 Subject: [PATCH 4/4] test(e2e): don't wipe a caller-prepared project dir in startRuntime The per-test (freshInstance) path copies the project into its dir, but startRuntime then rmSync'd that dir before spawning, leaving the runtime with an empty project. This only surfaced for freshInstance tests that use a project (the deploy journey: freshInstance via cliHomeDir + project=AdBids); the welcome-flow freshInstance tests use no project so an empty dir was correct. startRuntime now assumes the caller prepared projectDir. Validated locally: a freshInstance + project:AdBids instance now loads the project's resources (waitForProjectReady no longer times out on an empty project). --- web-common/tests/fixtures/rill-dev-fixtures.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/web-common/tests/fixtures/rill-dev-fixtures.ts b/web-common/tests/fixtures/rill-dev-fixtures.ts index 6d66d97d72c8..56ee5f292744 100644 --- a/web-common/tests/fixtures/rill-dev-fixtures.ts +++ b/web-common/tests/fixtures/rill-dev-fixtures.ts @@ -215,8 +215,9 @@ async function startRuntime({ }, ); - rmSync(projectDir, { force: true, recursive: true }); - mkdirSync(projectDir, { recursive: true }); + // The caller is responsible for preparing projectDir (an empty dir for the + // shared runtime, or a copied project for a per-test instance). Do not wipe + // it here, or a copied project would be deleted before the runtime starts. const cmd = `start --no-open --port ${port} --port-grpc ${grpcPort} ${projectDir}`; const childProcess = spawn("../rill", cmd.split(" "), {