From f1fa89c696e82546d73e762f0e7c78b404da63dd Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Tue, 2 Jun 2026 08:58:21 +0200 Subject: [PATCH 01/13] fix(appkit): don't crash typegen when the warehouse is unreachable Type generation threw an uncaught error whenever the SQL warehouse was down. Every DESCRIBE QUERY failed, all queries degraded to an unknown result, and generateFromEntryPoint unconditionally threw an aggregate "Type generation failed" error that escaped uncaught at the Vite plugin (un-awaited generate()) and the CLI (sync cmd.parse()) call sites. Distinguish connectivity failures from genuine SQL errors: - Connectivity (executeStatement rejects): degrade silently. Reuse the last-known-good cached type if present, otherwise emit an unknown result. Never fatal, so a transient outage no longer fails a build. - SQL error (reachable warehouse, DESCRIBE FAILED): surface via a typed TypegenSyntaxError so the existing prod-throws / dev-warns gate applies. Eligible to fail prod builds only. Also stop caching unknown results: only successful describes with a result schema are persisted, so a transient outage never poisons the cache and a fixed query recovers on the next run. PR1 of 2 (user-visible behavior). PR2 will await the Vite buildStart/watcher, use parseAsync().catch() in the CLI, and add degrade/throw regression tests. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/type-generator/index.ts | 67 ++++++--- .../src/type-generator/query-registry.ts | 132 +++++++++++++----- .../tests/generate-queries.test.ts | 24 ++-- packages/appkit/src/type-generator/types.ts | 27 ++++ 4 files changed, 185 insertions(+), 65 deletions(-) diff --git a/packages/appkit/src/type-generator/index.ts b/packages/appkit/src/type-generator/index.ts index c9a528fe7..e0379152a 100644 --- a/packages/appkit/src/type-generator/index.ts +++ b/packages/appkit/src/type-generator/index.ts @@ -9,12 +9,39 @@ import { } from "./migration"; import { generateQueriesFromDescribe } from "./query-registry"; import { generateServingTypes as generateServingTypesImpl } from "./serving/generator"; -import type { QuerySchema } from "./types"; +import type { QuerySchema, QuerySyntaxError } from "./types"; dotenv.config(); const logger = createLogger("type-generator"); +/** + * Thrown when one or more queries fail `DESCRIBE QUERY` against a *reachable* + * warehouse — i.e. genuine SQL errors (bad table, syntax, incompatible type), + * as opposed to a connectivity failure (warehouse unreachable), which degrades + * silently. Whether this is fatal is the caller's decision: the Vite plugin and + * CLI fail the build in production and warn-only in development. + */ +export class TypegenSyntaxError extends Error { + readonly queries: QuerySyntaxError[]; + + constructor(queries: QuerySyntaxError[], warehouseId?: string) { + const names = queries.map((q) => q.name).join(", "); + super( + [ + `Type generation failed: ${queries.length} ${queries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, + `DESCRIBE QUERY failed for these queries — see the error codes above for details.`, + `Common causes: SQL syntax errors, missing tables/views, or warehouse format incompatibilities.`, + warehouseId + ? `To debug: run the failing query directly in a SQL editor against warehouse ${warehouseId}.` + : `To debug: run the failing query directly in a SQL editor.`, + ].join("\n"), + ); + this.name = "TypegenSyntaxError"; + this.queries = queries; + } +} + /** * Generate type declarations for QueryRegistry * Create the d.ts file from the plugin routes and query schemas @@ -64,28 +91,13 @@ export async function generateFromEntryPoint(options: { logger.debug("Starting type generation..."); let queryRegistry: QuerySchema[] = []; - if (queryFolder) - queryRegistry = await generateQueriesFromDescribe( - queryFolder, - warehouseId, - { - noCache, - }, - ); - - const failedQueries = queryRegistry.filter((q) => - q.type.includes("result: unknown"), - ); - if (failedQueries.length > 0) { - const names = failedQueries.map((q) => q.name).join(", "); - throw new Error( - [ - `Type generation failed: ${failedQueries.length} ${failedQueries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, - `DESCRIBE QUERY failed for these queries — see the error codes above for details.`, - `Common causes: SQL syntax errors, missing tables/views, or warehouse format incompatibilities.`, - `To debug: run the failing query directly in a SQL editor against warehouse ${warehouseId}.`, - ].join("\n"), - ); + let syntaxErrors: QuerySyntaxError[] = []; + if (queryFolder) { + const result = await generateQueriesFromDescribe(queryFolder, warehouseId, { + noCache, + }); + queryRegistry = result.schemas; + syntaxErrors = result.syntaxErrors; } const typeDeclarations = generateTypeDeclarations(queryRegistry); @@ -97,6 +109,15 @@ export async function generateFromEntryPoint(options: { await removeOldGeneratedTypes(projectRoot, "appKitTypes.d.ts"); await migrateProjectConfig(projectRoot); + // Types are always written above — including `result: unknown` for any query + // that could not be described — so a transient warehouse outage never blocks a + // build. Only a genuine SQL error against a REACHABLE warehouse is surfaced as + // a throw; the Vite plugin / CLI apply the prod-fails / dev-warns gate. + // Connectivity failures are absent from `syntaxErrors`, so they pass silently. + if (syntaxErrors.length > 0) { + throw new TypegenSyntaxError(syntaxErrors, warehouseId); + } + logger.debug("Type generation complete!"); } diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 06ee64bac..3ab4c678f 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -7,7 +7,9 @@ import { CACHE_VERSION, hashSQL, loadCache, saveCache } from "./cache"; import { Spinner } from "./spinner"; import { type DatabricksStatementExecutionResponse, + type QueryGenerationResult, type QuerySchema, + type QuerySyntaxError, sqlTypeToHelper, sqlTypeToMarker, } from "./types"; @@ -272,7 +274,7 @@ export async function generateQueriesFromDescribe( queryFolder: string, warehouseId: string, options: { noCache?: boolean; concurrency?: number } = {}, -): Promise { +): Promise { const { noCache = false, concurrency: rawConcurrency = 10 } = options; const concurrency = typeof rawConcurrency === "number" && Number.isFinite(rawConcurrency) @@ -314,7 +316,9 @@ export async function generateQueriesFromDescribe( const logEntries: Array<{ queryName: string; status: "HIT" | "MISS"; - failed?: boolean; + // Absent for clean hits/misses. "syntax" = bad SQL on a reachable warehouse; + // "connectivity" = warehouse unreachable; "empty" = described but no columns. + kind?: "syntax" | "connectivity" | "empty"; error?: { code?: string; message: string }; }> = []; @@ -369,20 +373,32 @@ export async function generateQueriesFromDescribe( // Phase 2: Execute all uncached DESCRIBE calls in parallel type DescribeResult = | { + // Described successfully with a result schema — the only case we cache. status: "ok"; index: number; schema: QuerySchema; cacheEntry: { hash: string; type: string; retry: boolean }; } | { - status: "fail"; + // Reachable warehouse ran DESCRIBE and rejected the statement — a + // genuine SQL error. Eligible to fail the build; never cached. + status: "syntax"; index: number; schema: QuerySchema; - cacheEntry: { hash: string; type: string; retry: boolean }; error: { code?: string; message: string }; + } + | { + // DESCRIBE succeeded but returned no columns — soft `unknown`. Not a + // failure, not cached, retried next run. + status: "empty"; + index: number; + schema: QuerySchema; }; const freshResults: Array<{ index: number; schema: QuerySchema }> = []; + // Genuine SQL errors (reachable warehouse). Connectivity failures are NOT + // recorded here — they degrade silently so a transient outage isn't fatal. + const syntaxErrors: QuerySyntaxError[] = []; if (uncachedQueries.length > 0) { let completed = 0; @@ -416,25 +432,31 @@ export async function generateQueriesFromDescribe( ); if (result.status.state === "FAILED") { + // The warehouse was reachable and ran DESCRIBE, but the statement + // failed — a genuine SQL error (bad table, syntax, incompatible type). const sqlError = result.status.error?.message || "Query execution failed"; logger.warn("DESCRIBE failed for %s: %s", queryName, sqlError); const type = generateUnknownResultQuery(sql, queryName); return { - status: "fail", + status: "syntax", index, schema: { name: queryName, type }, - cacheEntry: { hash: sqlHash, type, retry: true }, error: parseError(sqlError), }; } const { type, hasResults } = convertToQueryType(result, sql, queryName); + if (!hasResults) { + // Described, but no result columns. Emit `unknown` and retry next run; + // do not cache (we never persist `result: unknown`). + return { status: "empty", index, schema: { name: queryName, type } }; + } return { status: "ok", index, schema: { name: queryName, type }, - cacheEntry: { hash: sqlHash, type, retry: !hasResults }, + cacheEntry: { hash: sqlHash, type, retry: false }, }; }; @@ -450,27 +472,50 @@ export async function generateQueriesFromDescribe( if (entry.status === "fulfilled") { const res = entry.value; freshResults.push({ index: res.index, schema: res.schema }); - cache.queries[queryName] = res.cacheEntry; - logEntries.push({ - queryName, - status: "MISS", - failed: res.status === "fail", - error: res.status === "fail" ? res.error : undefined, - }); + + if (res.status === "ok") { + // Only a successful describe with a result schema is cached. + cache.queries[queryName] = res.cacheEntry; + logEntries.push({ queryName, status: "MISS" }); + } else if (res.status === "syntax") { + // Genuine SQL error — record it for the caller's prod/dev gate. + // Not cached: re-described next run so a fixed query recovers. + syntaxErrors.push({ name: queryName, message: res.error.message }); + logEntries.push({ + queryName, + status: "MISS", + kind: "syntax", + error: res.error, + }); + } else { + // status === "empty": described, no columns. Soft unknown, not cached. + logEntries.push({ queryName, status: "MISS", kind: "empty" }); + } } else { - const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; + // executeStatement rejected → the warehouse was unreachable (down, + // network, timeout, auth). This is NOT a query error: reuse the + // last-known-good cached type if we have one (the cache only ever + // holds good types now), otherwise emit `unknown`. Never cached, + // never fatal — so a transient outage can't fail the build. + const { sql, index } = uncachedQueries[batchOffset + i]; const reason = entry.reason instanceof Error ? entry.reason.message : String(entry.reason); - logger.warn("DESCRIBE rejected for %s: %s", queryName, reason); - const type = generateUnknownResultQuery(sql, queryName); + const prior = cache.queries[queryName]; + const type = + prior?.type ?? generateUnknownResultQuery(sql, queryName); + logger.warn( + "DESCRIBE unreachable for %s: %s — %s", + queryName, + reason, + prior ? "reusing last cached type" : "emitting unknown (no cache)", + ); freshResults.push({ index, schema: { name: queryName, type } }); - cache.queries[queryName] = { hash: sqlHash, type, retry: true }; logEntries.push({ queryName, status: "MISS", - failed: true, + kind: "connectivity", error: parseError(reason), }); } @@ -507,36 +552,59 @@ export async function generateQueriesFromDescribe( ); console.log(` ${separator}`); for (const entry of logEntries) { - const tag = entry.failed - ? pc.bold(pc.red("ERROR")) - : entry.status === "HIT" - ? `cache ${pc.bold(pc.green("HIT "))}` - : `cache ${pc.bold(pc.yellow("MISS "))}`; + let tag: string; + switch (entry.kind) { + case "syntax": + tag = pc.bold(pc.red("SQL ERR")); + break; + case "connectivity": + tag = pc.bold(pc.yellow("OFFLINE")); + break; + case "empty": + tag = pc.dim("EMPTY "); + break; + default: + tag = + entry.status === "HIT" + ? `cache ${pc.bold(pc.green("HIT "))}` + : `cache ${pc.bold(pc.yellow("MISS "))}`; + } const rawName = entry.queryName.padEnd(maxNameLen); - const name = entry.failed ? pc.dim(pc.strikethrough(rawName)) : rawName; + // Only genuine SQL errors are struck through. Connectivity/empty kept a + // usable type (reused or unknown), so they read as degraded, not broken. + const name = + entry.kind === "syntax" ? pc.dim(pc.strikethrough(rawName)) : rawName; const errorCode = entry.error?.message.match(/\[([^\]]+)\]/)?.[1]; const reason = errorCode ? ` ${pc.dim(errorCode)}` : ""; console.log(` ${tag} ${name}${reason}`); } const newCount = logEntries.filter( - (e) => e.status === "MISS" && !e.failed, + (e) => e.status === "MISS" && !e.kind, ).length; - const cacheCount = logEntries.filter( - (e) => e.status === "HIT" && !e.failed, + const cacheCount = logEntries.filter((e) => e.status === "HIT").length; + const syntaxCount = logEntries.filter((e) => e.kind === "syntax").length; + const offlineCount = logEntries.filter( + (e) => e.kind === "connectivity", ).length; - const errorCount = logEntries.filter((e) => e.failed).length; + const emptyCount = logEntries.filter((e) => e.kind === "empty").length; console.log(` ${separator}`); const parts = [`${newCount} new`, `${cacheCount} from cache`]; - if (errorCount > 0) - parts.push(`${errorCount} ${errorCount === 1 ? "error" : "errors"}`); + if (syntaxCount > 0) + parts.push( + `${syntaxCount} SQL ${syntaxCount === 1 ? "error" : "errors"}`, + ); + if (offlineCount > 0) parts.push(`${offlineCount} unreachable`); + if (emptyCount > 0) parts.push(`${emptyCount} empty`); console.log(` ${parts.join(", ")}. ${pc.dim(`${elapsed}s`)}`); console.log(""); } // Merge and sort by original file index for deterministic output - return [...cachedResults, ...freshResults] + const schemas = [...cachedResults, ...freshResults] .sort((a, b) => a.index - b.index) .map((r) => r.schema); + + return { schemas, syntaxErrors }; } /** diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index ac43ef9e2..0a707bbc9 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -64,7 +64,7 @@ describe("generateQueriesFromDescribe", () => { ]), ); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -85,7 +85,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("bad_table"); @@ -102,7 +102,7 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED" }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("query"); @@ -127,7 +127,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(2); @@ -143,7 +143,7 @@ describe("generateQueriesFromDescribe", () => { expect(mocks.saveCache).toHaveBeenCalledTimes(1); }); - test("all queries fail — caches with retry flag, all unknown result types", async () => { + test("all queries fail (connectivity + syntax) — all produce unknown result types", async () => { mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); mocks.readFile .mockResolvedValueOnce("SELECT * FROM table_a") @@ -156,7 +156,7 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED", error: { message: "Table not found" } }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(2); expect(schemas[0].name).toBe("a"); @@ -181,9 +181,13 @@ describe("generateQueriesFromDescribe", () => { .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) .mockResolvedValueOnce(succeededResult([["id", "INT", null]])); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123", { - concurrency: 2, - }); + const { schemas } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + { + concurrency: 2, + }, + ); expect(schemas).toHaveLength(3); expect(schemas[0].name).toBe("q1"); @@ -201,7 +205,7 @@ describe("generateQueriesFromDescribe", () => { ); mocks.executeStatement.mockRejectedValueOnce(new Error("timeout")); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].type).toContain("status: SQLStringMarker"); diff --git a/packages/appkit/src/type-generator/types.ts b/packages/appkit/src/type-generator/types.ts index f54176a8c..aefa42823 100644 --- a/packages/appkit/src/type-generator/types.ts +++ b/packages/appkit/src/type-generator/types.ts @@ -76,3 +76,30 @@ export interface QuerySchema { name: string; type: string; } + +/** + * A genuine SQL error: `DESCRIBE QUERY` ran against a *reachable* warehouse and + * the warehouse reported the statement as FAILED (bad table, syntax error, + * incompatible type, …). Distinct from a connectivity failure (warehouse + * unreachable), which is non-fatal and never recorded here. + * @property name - the query name + * @property message - the SQL error message reported by the warehouse + */ +export interface QuerySyntaxError { + name: string; + message: string; +} + +/** + * Result of describing a folder of queries. + * @property schemas - one schema per query, in original file order. Queries that + * could not be described carry `result: unknown` so output stays valid. + * @property syntaxErrors - queries whose DESCRIBE failed against a reachable + * warehouse (genuine SQL errors). Connectivity failures are deliberately NOT + * included: they degrade silently (reuse last-known-good type or emit + * `unknown`) so a transient outage never fails a build. + */ +export interface QueryGenerationResult { + schemas: QuerySchema[]; + syntaxErrors: QuerySyntaxError[]; +} From bea1bb17c795365fac83ccd7a3d7f636dc6f19d7 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Tue, 2 Jun 2026 09:25:17 +0200 Subject: [PATCH 02/13] test(appkit): cover typegen warehouse-down classification and throw seam Add the regression coverage the warehouse-down crash slipped through: the prior suite tested rejection->unknown as graceful but never connected it to the aggregate throw in generateFromEntryPoint. query-registry (generate-queries.test.ts): - connectivity reuses the last-known-good cached type - empty result (described, no columns) -> unknown, not syntax, not cached - syntax (FAILED) -> recorded in syntaxErrors, not cached - cache HIT serves the stored type without a warehouse call - legacy retry-flagged entry is re-described, not reused - mixed run records only the syntax failure; failures are not persisted generateFromEntryPoint (index.test.ts): - syntax errors throw TypegenSyntaxError - connectivity-only failures do NOT throw (the warehouse-down regression) - the .d.ts is written before the throw Layers 1+2 of the test plan; Layer 3 (analytics vite-plugin) and Layer 4 (CLI exit codes) land in PR2 with their await/parseAsync refactors. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../tests/generate-queries.test.ts | 145 +++++++++++++++++- .../src/type-generator/tests/index.test.ts | 105 ++++++++++++- 2 files changed, 246 insertions(+), 4 deletions(-) diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index 0a707bbc9..5d032fb33 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -38,6 +38,20 @@ vi.mock("../cache", async (importOriginal) => { }); const { generateQueriesFromDescribe } = await import("../query-registry"); +const { CACHE_VERSION, hashSQL } = await import("../cache"); + +// Sentinel for a previously-generated good type. The code passes cached types +// through verbatim, so equality proves reuse rather than regeneration. +const CACHED_GOOD_TYPE = "RESULT_REUSED_FROM_CACHE"; + +// The `queries` map of the cache object last handed to saveCache — i.e. what +// actually got persisted this run. +const lastSavedQueries = () => + ( + mocks.saveCache.mock.calls.at(-1)?.[0] as + | { queries: Record } + | undefined + )?.queries; function succeededResult(columns: [string, string, string | null][]) { return { @@ -64,7 +78,10 @@ describe("generateQueriesFromDescribe", () => { ]), ); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -72,6 +89,9 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[0].type).toContain("name: string"); expect(mocks.spinnerStop).toHaveBeenCalledWith(""); expect(mocks.saveCache).toHaveBeenCalledTimes(1); + // clean success: cached, and not flagged as a syntax error + expect(syntaxErrors).toEqual([]); + expect(lastSavedQueries()?.users.type).toContain("id: number"); }); test("FAILED status with error message — reports SQL error and produces unknown result type", async () => { @@ -156,7 +176,10 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED", error: { message: "Table not found" } }, }); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(2); expect(schemas[0].name).toBe("a"); @@ -166,6 +189,11 @@ describe("generateQueriesFromDescribe", () => { // saveCache called once after all parallel queries complete expect(mocks.saveCache).toHaveBeenCalledTimes(1); + // a = connectivity (rejected) → NOT a syntax error; b = FAILED → syntax error + expect(syntaxErrors).toEqual([{ name: "b", message: "Table not found" }]); + // neither failure is persisted to the cache + expect(lastSavedQueries()).not.toHaveProperty("a"); + expect(lastSavedQueries()).not.toHaveProperty("b"); }); test("concurrency batching — saves cache after each batch", async () => { @@ -212,4 +240,117 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[0].type).toContain("org: SQLTypeMarker"); expect(schemas[0].type).toContain("result: unknown"); }); + + test("connectivity failure reuses the last-known-good cached type", async () => { + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue(sql); + // A prior good type cached under a STALE hash: the query is a cache MISS + // (so DESCRIBE is attempted) but a known-good type still exists to reuse. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + users: { hash: "stale-hash", type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + mocks.executeStatement.mockRejectedValueOnce(new Error("ECONNREFUSED")); + + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + // reused the cached type instead of clobbering it with `result: unknown` + expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); + expect(schemas[0].type).not.toContain("result: unknown"); + // connectivity is never recorded as a syntax error + expect(syntaxErrors).toEqual([]); + // the existing good entry is left intact (not overwritten) + expect(lastSavedQueries()?.users).toEqual({ + hash: "stale-hash", + type: CACHED_GOOD_TYPE, + retry: false, + }); + }); + + test("empty result (described, no columns) is unknown, not a syntax error, not cached", async () => { + mocks.readdir.mockResolvedValue(["empty.sql"]); + mocks.readFile.mockResolvedValue("SELECT 1"); + mocks.executeStatement.mockResolvedValue(succeededResult([])); + + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(lastSavedQueries()).not.toHaveProperty("empty"); + }); + + test("syntax error (FAILED) is recorded in syntaxErrors and not cached", async () => { + mocks.readdir.mockResolvedValue(["broken.sql"]); + mocks.readFile.mockResolvedValue("SELECT * FROM missing"); + mocks.executeStatement.mockResolvedValue({ + statement_id: "stmt", + status: { + state: "FAILED", + error: { message: "Table or view not found: missing" }, + }, + }); + + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([ + { name: "broken", message: "Table or view not found: missing" }, + ]); + expect(lastSavedQueries()).not.toHaveProperty("broken"); + }); + + test("cache HIT serves the stored type without calling the warehouse", async () => { + const sql = "SELECT id FROM t"; + mocks.readdir.mockResolvedValue(["t.sql"]); + mocks.readFile.mockResolvedValue(sql); + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + t: { hash: hashSQL(sql), type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + + const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); + expect(syntaxErrors).toEqual([]); + }); + + test("stale retry-flagged cache entry is re-described, not reused", async () => { + const sql = "SELECT id FROM t"; + mocks.readdir.mockResolvedValue(["t.sql"]); + mocks.readFile.mockResolvedValue(sql); + // Matching hash but retry:true (legacy poisoned entry) → must NOT be a HIT. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + t: { hash: hashSQL(sql), type: "STALE_UNKNOWN", retry: true }, + }, + }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas[0].type).toContain("id: number"); + expect(schemas[0].type).not.toBe("STALE_UNKNOWN"); + }); }); diff --git a/packages/appkit/src/type-generator/tests/index.test.ts b/packages/appkit/src/type-generator/tests/index.test.ts index bd2052273..02e149df2 100644 --- a/packages/appkit/src/type-generator/tests/index.test.ts +++ b/packages/appkit/src/type-generator/tests/index.test.ts @@ -1,7 +1,30 @@ import fs from "node:fs"; import path from "node:path"; -import { afterAll, beforeAll, describe, expect, test } from "vitest"; -import { generateFromEntryPoint } from "../index"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + test, + vi, +} from "vitest"; + +const mocks = vi.hoisted(() => ({ + generateQueriesFromDescribe: vi.fn(), +})); + +// Mock only the warehouse-describe step; index.ts owns the throw decision we +// want to exercise (syntax errors fatal, connectivity failures non-fatal). +vi.mock("../query-registry", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + generateQueriesFromDescribe: mocks.generateQueriesFromDescribe, + }; +}); + +const { generateFromEntryPoint, TypegenSyntaxError } = await import("../index"); const outputDir = path.join(__dirname, "__output__"); @@ -52,3 +75,81 @@ describe("generateFromEntryPoint", () => { expect(content).toContain("interface QueryRegistry {}"); }); }); + +describe("generateFromEntryPoint — query failure handling", () => { + const failuresDir = path.join(__dirname, "__output_failures__"); + const outFile = path.join(failuresDir, "analytics.d.ts"); + + const unknownSchema = (name: string) => ({ + name, + type: `{ name: "${name}"; parameters: Record; result: unknown; }`, + }); + + beforeAll(() => { + if (!fs.existsSync(failuresDir)) { + fs.mkdirSync(failuresDir, { recursive: true }); + } + }); + + afterAll(() => { + if (fs.existsSync(failuresDir)) { + fs.rmSync(failuresDir, { recursive: true }); + } + }); + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test("throws TypegenSyntaxError when a query has a genuine SQL error", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad")], + syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenSyntaxError); + }); + + test("does not throw when only connectivity failures occurred (warehouse down)", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("a"), unknownSchema("b")], + syntaxErrors: [], + }); + + // The reported bug: a down warehouse must NOT crash type generation. + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).resolves.toBeUndefined(); + }); + + test("writes the .d.ts before throwing on a syntax error", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad")], + syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenSyntaxError); + + // Types are emitted even on failure so the build/dev still has a valid file. + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain( + "interface QueryRegistry", + ); + }); +}); From 1e6121695e038dfd1a86546f6ea5d95ece329bb4 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Wed, 3 Jun 2026 14:17:42 +0200 Subject: [PATCH 03/13] fix: classify typegen describe rejections Signed-off-by: Atila Fassina --- .../src/type-generator/query-registry.ts | 41 ++++++++++++++----- .../tests/generate-queries.test.ts | 28 ++++++++++--- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 3ab4c678f..a413ee192 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -84,6 +84,17 @@ function parseError(raw: string): { code?: string; message: string } { return { message: raw }; } +function isConnectivityError(raw: string): boolean { + return ( + /\b(ECONNREFUSED|ECONNRESET|ENOTFOUND|ETIMEDOUT|EAI_AGAIN|EHOSTUNREACH|ENETUNREACH)\b/i.test( + raw, + ) || + /\b(connection refused|connection reset|fetch failed|network error|socket hang up|timed? ?out|timeout)\b/i.test( + raw, + ) + ); +} + /** * Extract parameters from a SQL query * @param sql - the SQL query to extract parameters from @@ -492,31 +503,41 @@ export async function generateQueriesFromDescribe( logEntries.push({ queryName, status: "MISS", kind: "empty" }); } } else { - // executeStatement rejected → the warehouse was unreachable (down, - // network, timeout, auth). This is NOT a query error: reuse the - // last-known-good cached type if we have one (the cache only ever - // holds good types now), otherwise emit `unknown`. Never cached, - // never fatal — so a transient outage can't fail the build. - const { sql, index } = uncachedQueries[batchOffset + i]; + // executeStatement rejected before the warehouse returned a statement + // result. Only clear transport failures are treated as offline; auth, + // bad warehouse IDs, malformed requests, and SDK/config failures stay + // fatal so users fix the underlying setup issue. + const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; const reason = entry.reason instanceof Error ? entry.reason.message : String(entry.reason); + const error = parseError(reason); + if (!isConnectivityError(reason)) { + spinner.stop(""); + throw new Error( + `DESCRIBE request failed for ${queryName}: ${error.message}`, + ); + } const prior = cache.queries[queryName]; - const type = - prior?.type ?? generateUnknownResultQuery(sql, queryName); + const canReusePrior = prior?.hash === sqlHash && !prior.retry; + const type = canReusePrior + ? prior.type + : generateUnknownResultQuery(sql, queryName); logger.warn( "DESCRIBE unreachable for %s: %s — %s", queryName, reason, - prior ? "reusing last cached type" : "emitting unknown (no cache)", + canReusePrior + ? "reusing last cached type" + : "emitting unknown (no matching cache)", ); freshResults.push({ index, schema: { name: queryName, type } }); logEntries.push({ queryName, status: "MISS", kind: "connectivity", - error: parseError(reason), + error, }); } } diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index 5d032fb33..e6b2cd081 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -241,12 +241,13 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[0].type).toContain("result: unknown"); }); - test("connectivity failure reuses the last-known-good cached type", async () => { + test("connectivity failure with stale cache emits unknown for the current SQL", async () => { const sql = "SELECT id FROM users"; mocks.readdir.mockResolvedValue(["users.sql"]); mocks.readFile.mockResolvedValue(sql); // A prior good type cached under a STALE hash: the query is a cache MISS - // (so DESCRIBE is attempted) but a known-good type still exists to reuse. + // (so DESCRIBE is attempted). If the warehouse is unreachable, do not + // publish the stale result columns for different SQL text. mocks.loadCache.mockReturnValueOnce({ version: CACHE_VERSION, queries: { @@ -260,12 +261,11 @@ describe("generateQueriesFromDescribe", () => { "wh-123", ); - // reused the cached type instead of clobbering it with `result: unknown` - expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); - expect(schemas[0].type).not.toContain("result: unknown"); + expect(schemas[0].type).not.toBe(CACHED_GOOD_TYPE); + expect(schemas[0].type).toContain("result: unknown"); // connectivity is never recorded as a syntax error expect(syntaxErrors).toEqual([]); - // the existing good entry is left intact (not overwritten) + // the existing good entry is left intact (not overwritten with unknown) expect(lastSavedQueries()?.users).toEqual({ hash: "stale-hash", type: CACHED_GOOD_TYPE, @@ -273,6 +273,22 @@ describe("generateQueriesFromDescribe", () => { }); }); + test("fatal rejected DESCRIBE request is not downgraded to offline", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + new Error("PERMISSION_DENIED: missing warehouse permission"), + ); + + await expect( + generateQueriesFromDescribe("/queries", "wh-123"), + ).rejects.toThrow( + "DESCRIBE request failed for users: PERMISSION_DENIED: missing warehouse permission", + ); + + expect(mocks.saveCache).not.toHaveBeenCalled(); + }); + test("empty result (described, no columns) is unknown, not a syntax error, not cached", async () => { mocks.readdir.mockResolvedValue(["empty.sql"]); mocks.readFile.mockResolvedValue("SELECT 1"); From dc7b902d63a8569c09fea208098da32b3c592259 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Thu, 4 Jun 2026 11:19:40 +0200 Subject: [PATCH 04/13] fix(appkit): harden typegen describe error handling Signed-off-by: Atila Fassina --- packages/appkit/src/type-generator/index.ts | 60 ++++- .../src/type-generator/query-registry.ts | 182 ++++++++++++--- .../tests/generate-queries.test.ts | 209 ++++++++++++++++-- .../src/type-generator/tests/index.test.ts | 47 +++- packages/appkit/src/type-generator/types.ts | 16 ++ .../appkit/src/type-generator/vite-plugin.ts | 2 +- .../src/cli/commands/type-generator.d.ts | 9 + packages/shared/src/cli/index.ts | 2 +- 8 files changed, 471 insertions(+), 56 deletions(-) diff --git a/packages/appkit/src/type-generator/index.ts b/packages/appkit/src/type-generator/index.ts index e0379152a..8432cdb07 100644 --- a/packages/appkit/src/type-generator/index.ts +++ b/packages/appkit/src/type-generator/index.ts @@ -9,7 +9,7 @@ import { } from "./migration"; import { generateQueriesFromDescribe } from "./query-registry"; import { generateServingTypes as generateServingTypesImpl } from "./serving/generator"; -import type { QuerySchema, QuerySyntaxError } from "./types"; +import type { QueryFatalError, QuerySchema, QuerySyntaxError } from "./types"; dotenv.config(); @@ -24,21 +24,59 @@ const logger = createLogger("type-generator"); */ export class TypegenSyntaxError extends Error { readonly queries: QuerySyntaxError[]; + readonly fatalQueries: QueryFatalError[]; - constructor(queries: QuerySyntaxError[], warehouseId?: string) { + constructor( + queries: QuerySyntaxError[], + warehouseId?: string, + fatalQueries: QueryFatalError[] = [], + ) { const names = queries.map((q) => q.name).join(", "); + const fatalNames = fatalQueries.map((q) => q.name).join(", "); super( [ `Type generation failed: ${queries.length} ${queries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, `DESCRIBE QUERY failed for these queries — see the error codes above for details.`, + fatalQueries.length > 0 + ? `Additionally, ${fatalQueries.length} ${fatalQueries.length === 1 ? "query" : "queries"} could not request DESCRIBE QUERY because of non-SQL fatal errors: ${fatalNames}.` + : undefined, `Common causes: SQL syntax errors, missing tables/views, or warehouse format incompatibilities.`, warehouseId ? `To debug: run the failing query directly in a SQL editor against warehouse ${warehouseId}.` : `To debug: run the failing query directly in a SQL editor.`, - ].join("\n"), + ] + .filter(Boolean) + .join("\n"), ); this.name = "TypegenSyntaxError"; this.queries = queries; + this.fatalQueries = fatalQueries; + } +} + +/** + * Thrown when DESCRIBE QUERY could not be requested because of a non-SQL fatal + * setup/request problem, such as missing permissions, invalid warehouse IDs, or + * malformed SDK configuration. Like TypegenSyntaxError, this is thrown only + * after the declaration file has been written with `result: unknown` entries. + */ +export class TypegenFatalError extends Error { + readonly queries: QueryFatalError[]; + + constructor(queries: QueryFatalError[], warehouseId?: string) { + const names = queries.map((q) => q.name).join(", "); + super( + [ + `Type generation failed: ${queries.length} ${queries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, + `DESCRIBE QUERY could not be requested for these queries — see the error details above.`, + `Common causes: missing warehouse permissions, invalid warehouse ID, authentication failure, or SDK configuration errors.`, + warehouseId + ? `To debug: verify access to warehouse ${warehouseId} and rerun type generation.` + : `To debug: verify warehouse access and rerun type generation.`, + ].join("\n"), + ); + this.name = "TypegenFatalError"; + this.queries = queries; } } @@ -92,12 +130,14 @@ export async function generateFromEntryPoint(options: { let queryRegistry: QuerySchema[] = []; let syntaxErrors: QuerySyntaxError[] = []; + let fatalErrors: QueryFatalError[] = []; if (queryFolder) { const result = await generateQueriesFromDescribe(queryFolder, warehouseId, { noCache, }); queryRegistry = result.schemas; - syntaxErrors = result.syntaxErrors; + syntaxErrors = result.syntaxErrors ?? []; + fatalErrors = result.fatalErrors ?? []; } const typeDeclarations = generateTypeDeclarations(queryRegistry); @@ -110,12 +150,14 @@ export async function generateFromEntryPoint(options: { await migrateProjectConfig(projectRoot); // Types are always written above — including `result: unknown` for any query - // that could not be described — so a transient warehouse outage never blocks a - // build. Only a genuine SQL error against a REACHABLE warehouse is surfaced as - // a throw; the Vite plugin / CLI apply the prod-fails / dev-warns gate. - // Connectivity failures are absent from `syntaxErrors`, so they pass silently. + // that could not be described. Connectivity failures pass silently so a + // transient warehouse outage never blocks a build; genuine SQL errors and + // non-connectivity fatal request failures surface after the file write. if (syntaxErrors.length > 0) { - throw new TypegenSyntaxError(syntaxErrors, warehouseId); + throw new TypegenSyntaxError(syntaxErrors, warehouseId, fatalErrors); + } + if (fatalErrors.length > 0) { + throw new TypegenFatalError(fatalErrors, warehouseId); } logger.debug("Type generation complete!"); diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index a413ee192..95da18dad 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -7,6 +7,7 @@ import { CACHE_VERSION, hashSQL, loadCache, saveCache } from "./cache"; import { Spinner } from "./spinner"; import { type DatabricksStatementExecutionResponse, + type QueryFatalError, type QueryGenerationResult, type QuerySchema, type QuerySyntaxError, @@ -84,17 +85,128 @@ function parseError(raw: string): { code?: string; message: string } { return { message: raw }; } -function isConnectivityError(raw: string): boolean { +function isObject(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function getErrorMessage(error: unknown): string { + if (error instanceof Error) return error.message; + if (isObject(error) && typeof error.message === "string") { + return error.message; + } + return String(error); +} + +function getErrorDiagnostic(error: unknown): string { + const seen = new Set(); + const messages: string[] = []; + const stack = [error]; + + while (stack.length > 0) { + const current = stack.pop(); + if (current === undefined || seen.has(current)) continue; + seen.add(current); + + const message = getErrorMessage(current); + if ( + message && + message !== "[object Object]" && + !messages.includes(message) + ) { + messages.push(message); + } + + const code = getErrorCode(current); + if (code && !messages.includes(code)) messages.push(code); + + stack.push(...getErrorChildren(current)); + } + + return messages.length > 0 ? messages.join(": ") : getErrorMessage(error); +} + +function getErrorCode(error: unknown): string | undefined { + if (!isObject(error)) return undefined; + const code = error.code ?? error.errno; + return typeof code === "string" ? code : undefined; +} + +function getErrorStatus(error: unknown): number | undefined { + if (!isObject(error)) return undefined; + const direct = error.status ?? error.statusCode; + if (typeof direct === "number") return direct; + if (isObject(error.response) && typeof error.response.status === "number") { + return error.response.status; + } + return undefined; +} + +function getErrorChildren(error: unknown): unknown[] { + if (!isObject(error)) return []; + const children: unknown[] = []; + if ("cause" in error) children.push(error.cause); + if (error instanceof AggregateError) children.push(...error.errors); + return children; +} + +const CONNECTIVITY_ERROR_CODES = new Set([ + "ECONNREFUSED", + "ECONNRESET", + "ENOTFOUND", + "ETIMEDOUT", + "EAI_AGAIN", + "EAI_NODATA", + "EAI_NONAME", + "EHOSTUNREACH", + "ENETUNREACH", + "CERT_HAS_EXPIRED", + "DEPTH_ZERO_SELF_SIGNED_CERT", + "ERR_TLS_CERT_ALTNAME_INVALID", + "SELF_SIGNED_CERT_IN_CHAIN", + "UNABLE_TO_VERIFY_LEAF_SIGNATURE", +]); + +function isConnectivityMessage(message: string): boolean { return ( - /\b(ECONNREFUSED|ECONNRESET|ENOTFOUND|ETIMEDOUT|EAI_AGAIN|EHOSTUNREACH|ENETUNREACH)\b/i.test( - raw, - ) || - /\b(connection refused|connection reset|fetch failed|network error|socket hang up|timed? ?out|timeout)\b/i.test( - raw, + /\bconnection (?:refused|reset|timed out)\b/i.test(message) || + /\bsocket hang up\b/i.test(message) || + /\bnetwork error\b/i.test(message) || + /\bcertificate has expired\b/i.test(message) || + /\bunable to verify the first certificate\b/i.test(message) || + /\bupstream connect error or disconnect\/reset before headers\b/i.test( + message, ) ); } +function isConnectivityError(error: unknown): boolean { + const seen = new Set(); + const stack = [error]; + + while (stack.length > 0) { + const current = stack.pop(); + if (current === undefined || seen.has(current)) continue; + seen.add(current); + + const code = getErrorCode(current); + if ( + code && + (CONNECTIVITY_ERROR_CODES.has(code) || code.startsWith("UND_ERR_")) + ) { + return true; + } + + const status = getErrorStatus(current); + if (status === 502 || status === 503 || status === 504) return true; + + if (isConnectivityMessage(getErrorMessage(current))) return true; + + stack.push(...getErrorChildren(current)); + } + + return false; +} + /** * Extract parameters from a SQL query * @param sql - the SQL query to extract parameters from @@ -328,8 +440,9 @@ export async function generateQueriesFromDescribe( queryName: string; status: "HIT" | "MISS"; // Absent for clean hits/misses. "syntax" = bad SQL on a reachable warehouse; - // "connectivity" = warehouse unreachable; "empty" = described but no columns. - kind?: "syntax" | "connectivity" | "empty"; + // "connectivity" = warehouse unreachable; "empty" = described but no columns; + // "fatal" = non-SQL setup/request failure surfaced after .d.ts emission. + kind?: "syntax" | "connectivity" | "empty" | "fatal"; error?: { code?: string; message: string }; }> = []; @@ -410,6 +523,7 @@ export async function generateQueriesFromDescribe( // Genuine SQL errors (reachable warehouse). Connectivity failures are NOT // recorded here — they degrade silently so a transient outage isn't fatal. const syntaxErrors: QuerySyntaxError[] = []; + const fatalErrors: QueryFatalError[] = []; if (uncachedQueries.length > 0) { let completed = 0; @@ -503,27 +617,36 @@ export async function generateQueriesFromDescribe( logEntries.push({ queryName, status: "MISS", kind: "empty" }); } } else { - // executeStatement rejected before the warehouse returned a statement - // result. Only clear transport failures are treated as offline; auth, - // bad warehouse IDs, malformed requests, and SDK/config failures stay - // fatal so users fix the underlying setup issue. + // executeStatement rejected without a normal StatementExecution result. + // Only structured transport/connectivity failures are treated as + // offline; auth, bad warehouse IDs, malformed requests, and SDK/config + // failures stay fatal so users fix the underlying setup issue. + completed++; + spinner.update( + `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + ); + const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; - const reason = - entry.reason instanceof Error - ? entry.reason.message - : String(entry.reason); + const reason = getErrorDiagnostic(entry.reason); const error = parseError(reason); - if (!isConnectivityError(reason)) { - spinner.stop(""); - throw new Error( - `DESCRIBE request failed for ${queryName}: ${error.message}`, - ); - } const prior = cache.queries[queryName]; const canReusePrior = prior?.hash === sqlHash && !prior.retry; const type = canReusePrior ? prior.type : generateUnknownResultQuery(sql, queryName); + freshResults.push({ index, schema: { name: queryName, type } }); + + if (!isConnectivityError(entry.reason)) { + fatalErrors.push({ name: queryName, message: error.message }); + logEntries.push({ + queryName, + status: "MISS", + kind: "fatal", + error, + }); + continue; + } + logger.warn( "DESCRIBE unreachable for %s: %s — %s", queryName, @@ -532,7 +655,6 @@ export async function generateQueriesFromDescribe( ? "reusing last cached type" : "emitting unknown (no matching cache)", ); - freshResults.push({ index, schema: { name: queryName, type } }); logEntries.push({ queryName, status: "MISS", @@ -584,6 +706,9 @@ export async function generateQueriesFromDescribe( case "empty": tag = pc.dim("EMPTY "); break; + case "fatal": + tag = pc.bold(pc.red("FATAL ")); + break; default: tag = entry.status === "HIT" @@ -594,7 +719,9 @@ export async function generateQueriesFromDescribe( // Only genuine SQL errors are struck through. Connectivity/empty kept a // usable type (reused or unknown), so they read as degraded, not broken. const name = - entry.kind === "syntax" ? pc.dim(pc.strikethrough(rawName)) : rawName; + entry.kind === "syntax" || entry.kind === "fatal" + ? pc.dim(pc.strikethrough(rawName)) + : rawName; const errorCode = entry.error?.message.match(/\[([^\]]+)\]/)?.[1]; const reason = errorCode ? ` ${pc.dim(errorCode)}` : ""; console.log(` ${tag} ${name}${reason}`); @@ -608,6 +735,7 @@ export async function generateQueriesFromDescribe( (e) => e.kind === "connectivity", ).length; const emptyCount = logEntries.filter((e) => e.kind === "empty").length; + const fatalCount = logEntries.filter((e) => e.kind === "fatal").length; console.log(` ${separator}`); const parts = [`${newCount} new`, `${cacheCount} from cache`]; if (syntaxCount > 0) @@ -616,6 +744,10 @@ export async function generateQueriesFromDescribe( ); if (offlineCount > 0) parts.push(`${offlineCount} unreachable`); if (emptyCount > 0) parts.push(`${emptyCount} empty`); + if (fatalCount > 0) + parts.push( + `${fatalCount} fatal ${fatalCount === 1 ? "error" : "errors"}`, + ); console.log(` ${parts.join(", ")}. ${pc.dim(`${elapsed}s`)}`); console.log(""); } @@ -625,7 +757,7 @@ export async function generateQueriesFromDescribe( .sort((a, b) => a.index - b.index) .map((r) => r.schema); - return { schemas, syntaxErrors }; + return { schemas, syntaxErrors, fatalErrors }; } /** diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index e6b2cd081..637bbc161 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -78,10 +78,8 @@ describe("generateQueriesFromDescribe", () => { ]), ); - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( - "/queries", - "wh-123", - ); + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -91,6 +89,7 @@ describe("generateQueriesFromDescribe", () => { expect(mocks.saveCache).toHaveBeenCalledTimes(1); // clean success: cached, and not flagged as a syntax error expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); expect(lastSavedQueries()?.users.type).toContain("id: number"); }); @@ -176,10 +175,8 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED", error: { message: "Table not found" } }, }); - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( - "/queries", - "wh-123", - ); + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123"); expect(schemas).toHaveLength(2); expect(schemas[0].name).toBe("a"); @@ -231,7 +228,9 @@ describe("generateQueriesFromDescribe", () => { mocks.readFile.mockResolvedValue( "-- @param status STRING\nSELECT * FROM t WHERE status = :status AND org = :org", ); - mocks.executeStatement.mockRejectedValueOnce(new Error("timeout")); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("connect ETIMEDOUT"), { code: "ETIMEDOUT" }), + ); const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); @@ -254,17 +253,20 @@ describe("generateQueriesFromDescribe", () => { users: { hash: "stale-hash", type: CACHED_GOOD_TYPE, retry: false }, }, }); - mocks.executeStatement.mockRejectedValueOnce(new Error("ECONNREFUSED")); - - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( - "/queries", - "wh-123", + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("connect ECONNREFUSED"), { + code: "ECONNREFUSED", + }), ); + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123"); + expect(schemas[0].type).not.toBe(CACHED_GOOD_TYPE); expect(schemas[0].type).toContain("result: unknown"); // connectivity is never recorded as a syntax error expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); // the existing good entry is left intact (not overwritten with unknown) expect(lastSavedQueries()?.users).toEqual({ hash: "stale-hash", @@ -280,13 +282,182 @@ describe("generateQueriesFromDescribe", () => { new Error("PERMISSION_DENIED: missing warehouse permission"), ); - await expect( - generateQueriesFromDescribe("/queries", "wh-123"), - ).rejects.toThrow( - "DESCRIBE request failed for users: PERMISSION_DENIED: missing warehouse permission", + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([ + { + name: "users", + message: "PERMISSION_DENIED: missing warehouse permission", + }, + ]); + expect(mocks.saveCache).toHaveBeenCalledTimes(1); + expect(lastSavedQueries()).not.toHaveProperty("users"); + }); + + test("HTTP 503 wrapper error is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("Service unavailable"), { statusCode: 503 }), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test.each([ + ["HTTP 502", Object.assign(new Error("Bad gateway"), { status: 502 })], + [ + "HTTP 504 response", + Object.assign(new Error("Gateway timeout"), { + response: { status: 504 }, + }), + ], + [ + "EAI_NODATA", + Object.assign(new Error("DNS lookup failed"), { code: "EAI_NODATA" }), + ], + [ + "Envoy upstream disconnect", + new Error("upstream connect error or disconnect/reset before headers"), + ], + ])("%s is classified as connectivity", async (_name, error) => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce(error); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test("mixed syntax and fatal failures are both returned", async () => { + mocks.readdir.mockResolvedValue(["syntax.sql", "fatal.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT * FROM missing") + .mockResolvedValueOnce("SELECT * FROM auth_blocked"); + mocks.executeStatement + .mockResolvedValueOnce({ + statement_id: "stmt-syntax", + status: { + state: "FAILED", + error: { message: "Table not found" }, + }, + }) + .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123"); + + expect(schemas).toHaveLength(2); + expect(syntaxErrors).toEqual([ + { name: "syntax", message: "Table not found" }, + ]); + expect(fatalErrors).toEqual([ + { name: "fatal", message: "PERMISSION_DENIED" }, + ]); + }); + + test("undici cause code is classified as connectivity even when wrapper message is generic fetch failed", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new TypeError("fetch failed"), { + cause: { code: "UND_ERR_CONNECT_TIMEOUT" }, + }), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test("TLS certificate message is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + new Error("unable to verify the first certificate"), + ); + + const { fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", ); - expect(mocks.saveCache).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([]); + }); + + test("bare timeout and fetch failed messages are not overmatched as connectivity", async () => { + mocks.readdir.mockResolvedValue(["timeout.sql", "oauth.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM timeout") + .mockResolvedValueOnce("SELECT id FROM oauth"); + mocks.executeStatement + .mockRejectedValueOnce( + new Error("INVALID_PARAMETER_VALUE: timeout must be > 0"), + ) + .mockRejectedValueOnce( + Object.assign(new TypeError("fetch failed"), { + cause: { code: "EXPIRED_OAUTH_TOKEN", message: "token expired" }, + }), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas).toHaveLength(2); + expect(fatalErrors).toEqual([ + { + name: "timeout", + message: "INVALID_PARAMETER_VALUE: timeout must be > 0", + }, + { + name: "oauth", + message: "fetch failed: token expired: EXPIRED_OAUTH_TOKEN", + }, + ]); + }); + + test("successful describes in a fatal batch are saved", async () => { + mocks.readdir.mockResolvedValue(["good.sql", "bad_auth.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM good") + .mockResolvedValueOnce("SELECT id FROM bad_auth"); + mocks.executeStatement + .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) + .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("id: number"); + expect(schemas[1].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([ + { name: "bad_auth", message: "PERMISSION_DENIED" }, + ]); + expect(lastSavedQueries()?.good.type).toContain("id: number"); + expect(lastSavedQueries()).not.toHaveProperty("bad_auth"); }); test("empty result (described, no columns) is unknown, not a syntax error, not cached", async () => { diff --git a/packages/appkit/src/type-generator/tests/index.test.ts b/packages/appkit/src/type-generator/tests/index.test.ts index 02e149df2..4c37699ad 100644 --- a/packages/appkit/src/type-generator/tests/index.test.ts +++ b/packages/appkit/src/type-generator/tests/index.test.ts @@ -24,7 +24,8 @@ vi.mock("../query-registry", async (importOriginal) => { }; }); -const { generateFromEntryPoint, TypegenSyntaxError } = await import("../index"); +const { generateFromEntryPoint, TypegenFatalError, TypegenSyntaxError } = + await import("../index"); const outputDir = path.join(__dirname, "__output__"); @@ -105,6 +106,7 @@ describe("generateFromEntryPoint — query failure handling", () => { mocks.generateQueriesFromDescribe.mockResolvedValue({ schemas: [unknownSchema("bad")], syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + fatalErrors: [], }); await expect( @@ -116,10 +118,33 @@ describe("generateFromEntryPoint — query failure handling", () => { ).rejects.toThrow(TypegenSyntaxError); }); + test("TypegenSyntaxError includes fatal queries from a mixed failure", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad_sql"), unknownSchema("bad_auth")], + syntaxErrors: [{ name: "bad_sql", message: "Table not found" }], + fatalErrors: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toMatchObject({ + name: "TypegenSyntaxError", + fatalQueries: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain("bad_auth"); + }); + test("does not throw when only connectivity failures occurred (warehouse down)", async () => { mocks.generateQueriesFromDescribe.mockResolvedValue({ schemas: [unknownSchema("a"), unknownSchema("b")], syntaxErrors: [], + fatalErrors: [], }); // The reported bug: a down warehouse must NOT crash type generation. @@ -136,6 +161,7 @@ describe("generateFromEntryPoint — query failure handling", () => { mocks.generateQueriesFromDescribe.mockResolvedValue({ schemas: [unknownSchema("bad")], syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + fatalErrors: [], }); await expect( @@ -152,4 +178,23 @@ describe("generateFromEntryPoint — query failure handling", () => { "interface QueryRegistry", ); }); + + test("throws TypegenFatalError after writing the .d.ts for non-syntax fatal describe errors", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad_auth")], + syntaxErrors: [], + fatalErrors: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenFatalError); + + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain("bad_auth"); + }); }); diff --git a/packages/appkit/src/type-generator/types.ts b/packages/appkit/src/type-generator/types.ts index aefa42823..684b69045 100644 --- a/packages/appkit/src/type-generator/types.ts +++ b/packages/appkit/src/type-generator/types.ts @@ -90,6 +90,18 @@ export interface QuerySyntaxError { message: string; } +/** + * A non-SQL fatal error while attempting to describe a query: authentication, + * authorization, invalid warehouse/configuration, malformed SDK request, or + * any other setup problem that should not be treated as an offline warehouse. + * @property name - the query name + * @property message - the fatal error message + */ +export interface QueryFatalError { + name: string; + message: string; +} + /** * Result of describing a folder of queries. * @property schemas - one schema per query, in original file order. Queries that @@ -98,8 +110,12 @@ export interface QuerySyntaxError { * warehouse (genuine SQL errors). Connectivity failures are deliberately NOT * included: they degrade silently (reuse last-known-good type or emit * `unknown`) so a transient outage never fails a build. + * @property fatalErrors - non-SQL fatal describe request failures. These still + * produce `result: unknown` schemas so callers can write declarations before + * surfacing the error. */ export interface QueryGenerationResult { schemas: QuerySchema[]; syntaxErrors: QuerySyntaxError[]; + fatalErrors: QueryFatalError[]; } diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts index 5f4a0d4b1..1a9117aed 100644 --- a/packages/appkit/src/type-generator/vite-plugin.ts +++ b/packages/appkit/src/type-generator/vite-plugin.ts @@ -84,7 +84,7 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { }, buildStart() { - generate(); + return generate(); }, configureServer(server) { diff --git a/packages/shared/src/cli/commands/type-generator.d.ts b/packages/shared/src/cli/commands/type-generator.d.ts index ce69781fa..5b373b855 100644 --- a/packages/shared/src/cli/commands/type-generator.d.ts +++ b/packages/shared/src/cli/commands/type-generator.d.ts @@ -7,6 +7,15 @@ declare module "@databricks/appkit/type-generator" { noCache?: boolean; }): Promise; + export class TypegenSyntaxError extends Error { + readonly queries: Array<{ name: string; message: string }>; + readonly fatalQueries: Array<{ name: string; message: string }>; + } + + export class TypegenFatalError extends Error { + readonly queries: Array<{ name: string; message: string }>; + } + export function generateServingTypes(options: { outFile: string; noCache?: boolean; diff --git a/packages/shared/src/cli/index.ts b/packages/shared/src/cli/index.ts index 4d0ed65b7..aa60157c8 100644 --- a/packages/shared/src/cli/index.ts +++ b/packages/shared/src/cli/index.ts @@ -29,4 +29,4 @@ cmd.addCommand(docsCommand); cmd.addCommand(pluginCommand); cmd.addCommand(codemodCommand); -cmd.parse(); +await cmd.parseAsync(); From 97c94a1220e2d25917c85856a4487ae5363ab60e Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:29:30 +0200 Subject: [PATCH 05/13] feat(appkit): warehouse-aware typegen pre-flight Add a warehouse-status pre-flight to typegen and rework error handling so a stopped, starting, or unreachable warehouse degrades gracefully instead of crashing or emitting EMPTY types. - Classify connectivity (incl. the SDK's "Can't connect to "/code-500 DNS wrapper) as OFFLINE, and non-terminal describe states (PENDING/RUNNING) as degraded rather than EMPTY. - Surface typegen errors as their actionable message (no internal stack trace); format and de-duplicate the failure output. - Add a warehouse status probe + pure pre-flight policy; block in the CLI/build, roll forward (degrade) in dev. - Dev: regenerate types in the background once the warehouse reaches RUNNING (single-flight guarded); auto-start a stopped warehouse in dev. - CLI: --no-block degrades instead of describing so postinstall never blocks. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/type-generator/index.ts | 136 ++++-- .../appkit/src/type-generator/preflight.ts | 58 +++ .../src/type-generator/query-registry.ts | 419 ++++++++++++------ .../tests/generate-queries.test.ts | 257 +++++++++++ .../type-generator/tests/preflight.test.ts | 53 +++ .../type-generator/tests/vite-plugin.test.ts | 389 ++++++++++++++++ .../tests/warehouse-status.test.ts | 194 ++++++++ .../appkit/src/type-generator/vite-plugin.ts | 205 ++++++++- .../src/type-generator/warehouse-status.ts | 176 ++++++++ .../src/cli/commands/generate-types.test.ts | 21 + .../shared/src/cli/commands/generate-types.ts | 39 +- .../src/cli/commands/type-generator.d.ts | 5 + template/package.json | 5 +- 13 files changed, 1789 insertions(+), 168 deletions(-) create mode 100644 packages/appkit/src/type-generator/preflight.ts create mode 100644 packages/appkit/src/type-generator/tests/preflight.test.ts create mode 100644 packages/appkit/src/type-generator/tests/vite-plugin.test.ts create mode 100644 packages/appkit/src/type-generator/tests/warehouse-status.test.ts create mode 100644 packages/appkit/src/type-generator/warehouse-status.ts create mode 100644 packages/shared/src/cli/commands/generate-types.test.ts diff --git a/packages/appkit/src/type-generator/index.ts b/packages/appkit/src/type-generator/index.ts index 8432cdb07..0516c42d0 100644 --- a/packages/appkit/src/type-generator/index.ts +++ b/packages/appkit/src/type-generator/index.ts @@ -1,12 +1,14 @@ import fs from "node:fs/promises"; import path from "node:path"; import dotenv from "dotenv"; +import pc from "picocolors"; import { createLogger } from "../logging/logger"; import { migrateProjectConfig, removeOldGeneratedTypes, resolveProjectRoot, } from "./migration"; +import type { PreflightMode } from "./preflight"; import { generateQueriesFromDescribe } from "./query-registry"; import { generateServingTypes as generateServingTypesImpl } from "./serving/generator"; import type { QueryFatalError, QuerySchema, QuerySyntaxError } from "./types"; @@ -15,6 +17,83 @@ dotenv.config(); const logger = createLogger("type-generator"); +type TypegenFailure = QuerySyntaxError | QueryFatalError; + +function plural(count: number, singular: string, pluralForm = `${singular}s`) { + return count === 1 ? singular : pluralForm; +} + +function formatFailureRows( + label: string, + queries: TypegenFailure[], + color: (value: string) => string, +) { + if (queries.length === 0) return []; + + // Group by message so a shared failure — e.g. a warehouse-level fatal that + // hits every query identically — prints once instead of repeating per row. + const byMessage = new Map(); + for (const { name, message } of queries) { + const names = byMessage.get(message); + if (names) names.push(name); + else byMessage.set(message, [name]); + } + + const maxNameLen = Math.max(...queries.map((query) => query.name.length)); + const tag = color(label.padEnd(7)); + const rows: string[] = []; + for (const [message, names] of byMessage) { + // Unique message → keep the compact one-line `tag name message` form. + if (names.length === 1) { + rows.push( + ` ${tag} ${pc.bold(names[0].padEnd(maxNameLen))} ${pc.dim(message)}`, + ); + continue; + } + // Shared message → print it once, then list the affected query names. + rows.push( + ` ${tag} ${pc.dim(message)} ${pc.dim(`(${names.length} ${plural(names.length, "query", "queries")})`)}`, + ); + rows.push( + ` ${names.map((name) => pc.bold(name)).join(pc.dim(", "))}`, + ); + } + return rows; +} + +function formatTypegenFailureMessage(options: { + syntaxErrors: QuerySyntaxError[]; + fatalErrors?: QueryFatalError[]; + warehouseId?: string; + title: string; + causes: string[]; + nextStep: string; +}) { + const { syntaxErrors, fatalErrors = [], warehouseId, title } = options; + const total = syntaxErrors.length + fatalErrors.length; + const separator = pc.dim("─".repeat(60)); + const warehouse = warehouseId + ? ` against ${pc.dim(`warehouse ${warehouseId}`)}` + : ""; + + return [ + ` ${pc.bold(pc.red("Type generation failed"))}`, + ` ${separator}`, + ` ${title}: ${total} ${plural(total, "query", "queries")} could not be described${warehouse}.`, + ` AppKit wrote generated types with ${pc.bold("result: unknown")} for the failed ${plural(total, "query", "queries")}.`, + "", + ...formatFailureRows("SQL ERR", syntaxErrors, pc.red), + ...(syntaxErrors.length > 0 && fatalErrors.length > 0 ? [""] : []), + ...formatFailureRows("FATAL", fatalErrors, pc.red), + "", + ` ${pc.bold("Common causes")}`, + ...options.causes.map((cause) => ` - ${cause}`), + "", + ` ${pc.bold("Next step")}`, + ` ${options.nextStep}`, + ].join("\n"); +} + /** * Thrown when one or more queries fail `DESCRIBE QUERY` against a *reachable* * warehouse — i.e. genuine SQL errors (bad table, syntax, incompatible type), @@ -31,22 +110,21 @@ export class TypegenSyntaxError extends Error { warehouseId?: string, fatalQueries: QueryFatalError[] = [], ) { - const names = queries.map((q) => q.name).join(", "); - const fatalNames = fatalQueries.map((q) => q.name).join(", "); super( - [ - `Type generation failed: ${queries.length} ${queries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, - `DESCRIBE QUERY failed for these queries — see the error codes above for details.`, - fatalQueries.length > 0 - ? `Additionally, ${fatalQueries.length} ${fatalQueries.length === 1 ? "query" : "queries"} could not request DESCRIBE QUERY because of non-SQL fatal errors: ${fatalNames}.` - : undefined, - `Common causes: SQL syntax errors, missing tables/views, or warehouse format incompatibilities.`, - warehouseId - ? `To debug: run the failing query directly in a SQL editor against warehouse ${warehouseId}.` - : `To debug: run the failing query directly in a SQL editor.`, - ] - .filter(Boolean) - .join("\n"), + formatTypegenFailureMessage({ + syntaxErrors: queries, + fatalErrors: fatalQueries, + warehouseId, + title: "DESCRIBE QUERY failed", + causes: [ + "SQL syntax errors", + "missing tables or views", + "warehouse format incompatibilities", + ], + nextStep: warehouseId + ? `Run each SQL ERR query directly in a Databricks SQL editor against warehouse ${pc.bold(warehouseId)}.` + : "Run each SQL ERR query directly in a Databricks SQL editor.", + }), ); this.name = "TypegenSyntaxError"; this.queries = queries; @@ -64,16 +142,22 @@ export class TypegenFatalError extends Error { readonly queries: QueryFatalError[]; constructor(queries: QueryFatalError[], warehouseId?: string) { - const names = queries.map((q) => q.name).join(", "); super( - [ - `Type generation failed: ${queries.length} ${queries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, - `DESCRIBE QUERY could not be requested for these queries — see the error details above.`, - `Common causes: missing warehouse permissions, invalid warehouse ID, authentication failure, or SDK configuration errors.`, - warehouseId - ? `To debug: verify access to warehouse ${warehouseId} and rerun type generation.` - : `To debug: verify warehouse access and rerun type generation.`, - ].join("\n"), + formatTypegenFailureMessage({ + syntaxErrors: [], + fatalErrors: queries, + warehouseId, + title: "DESCRIBE QUERY could not be requested", + causes: [ + "missing warehouse permissions", + "invalid warehouse ID", + "authentication failure", + "SDK configuration errors", + ], + nextStep: warehouseId + ? `Verify access to warehouse ${pc.bold(warehouseId)} and rerun type generation.` + : "Verify warehouse access and rerun type generation.", + }), ); this.name = "TypegenFatalError"; this.queries = queries; @@ -122,8 +206,9 @@ export async function generateFromEntryPoint(options: { queryFolder?: string; warehouseId: string; noCache?: boolean; + mode?: PreflightMode; }) { - const { outFile, queryFolder, warehouseId, noCache } = options; + const { outFile, queryFolder, warehouseId, noCache, mode = "dev" } = options; const projectRoot = resolveProjectRoot(outFile); logger.debug("Starting type generation..."); @@ -134,6 +219,7 @@ export async function generateFromEntryPoint(options: { if (queryFolder) { const result = await generateQueriesFromDescribe(queryFolder, warehouseId, { noCache, + mode, }); queryRegistry = result.schemas; syntaxErrors = result.syntaxErrors ?? []; diff --git a/packages/appkit/src/type-generator/preflight.ts b/packages/appkit/src/type-generator/preflight.ts new file mode 100644 index 000000000..3bb67c5fd --- /dev/null +++ b/packages/appkit/src/type-generator/preflight.ts @@ -0,0 +1,58 @@ +import type { WarehouseState } from "./warehouse-status"; + +/** + * How aggressively typegen should react to a not-ready warehouse. + * - `dev`: never block the developer; degrade to `unknown`/cached types, but a + * RUNNING warehouse is still described (fast path / background fire-and-forget). + * - `blocking`: a startable warehouse is worth waiting for, and a stopped one + * is a hard failure. + * - `degrade`: never describe and never probe the warehouse — emit best-available + * types (cache where the SQL hash matches, else `unknown`) and return at once. + * For a one-shot CLI (`--no-block`) that can't describe in the background. + */ +export type PreflightMode = "dev" | "blocking" | "degrade"; + +/** + * What the caller should do given a warehouse state and mode. + * - `proceed`: run DESCRIBE now. + * - `degradeAll`: skip DESCRIBE; emit degraded (cached/`unknown`) types. + * - `waitThenProceed`: wait for the warehouse to start, then run DESCRIBE. + * - `fatal`: stop — the warehouse can't serve this run. + */ +export type PreflightDecision = + | "proceed" + | "degradeAll" + | "waitThenProceed" + | "fatal"; + +/** + * Pure policy mapping a warehouse state + mode to a preflight decision. + * + * Unknown/unexpected states fall through to `proceed`: the describe loop and + * its per-query backstop already degrade gracefully, so we don't want a new + * SDK state value to turn into a spurious `fatal`. + */ +export function decidePreflight( + state: WarehouseState, + mode: PreflightMode, +): PreflightDecision { + // `degrade` never describes regardless of state: emit cached/`unknown` types + // and return. The caller short-circuits before probing, so this is only a + // belt-and-suspenders mapping, but it keeps the policy total and self-contained. + if (mode === "degrade") return "degradeAll"; + + switch (state) { + case "RUNNING": + return "proceed"; + case "STARTING": + return mode === "blocking" ? "waitThenProceed" : "degradeAll"; + case "STOPPED": + case "STOPPING": + return mode === "blocking" ? "fatal" : "degradeAll"; + case "DELETED": + case "DELETING": + return "fatal"; + default: + return "proceed"; + } +} diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 95da18dad..da484ca86 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -4,6 +4,7 @@ import { WorkspaceClient } from "@databricks/sdk-experimental"; import pc from "picocolors"; import { createLogger } from "../logging/logger"; import { CACHE_VERSION, hashSQL, loadCache, saveCache } from "./cache"; +import { decidePreflight, type PreflightMode } from "./preflight"; import { Spinner } from "./spinner"; import { type DatabricksStatementExecutionResponse, @@ -14,9 +15,17 @@ import { sqlTypeToHelper, sqlTypeToMarker, } from "./types"; +import { getWarehouseState, waitUntilRunning } from "./warehouse-status"; const logger = createLogger("type-generator:query-registry"); +/** + * Upper bound on how long a `blocking`-mode preflight will wait for a starting + * warehouse to reach RUNNING before giving up (~5 min). Generous enough to ride + * out a cold start without hanging an interactive CLI invocation indefinitely. + */ +const PREFLIGHT_WAIT_MAX_MS = 300_000; + /** * Regex breakdown: * '(?:[^']|'')*' — matches a SQL string literal, including escaped '' pairs @@ -171,6 +180,7 @@ function isConnectivityMessage(message: string): boolean { /\bconnection (?:refused|reset|timed out)\b/i.test(message) || /\bsocket hang up\b/i.test(message) || /\bnetwork error\b/i.test(message) || + /\bcan'?t connect to\b/i.test(message) || /\bcertificate has expired\b/i.test(message) || /\bunable to verify the first certificate\b/i.test(message) || /\bupstream connect error or disconnect\/reset before headers\b/i.test( @@ -316,6 +326,25 @@ function generateUnknownResultQuery(sql: string, queryName: string): string { }`; } +/** + * Degrade gracefully when DESCRIBE can't produce a fresh schema (transient + * connectivity outage, or a warehouse that's reachable but not ready). Reuse + * the last-good cached type when the SQL hash is unchanged, otherwise emit + * `unknown` from SQL alone. Never persists `result: unknown`. + */ +function degradedType( + cache: Awaited>, + queryName: string, + sql: string, + sqlHash: string, +): string { + const prior = cache.queries[queryName]; + const canReusePrior = prior?.hash === sqlHash && !prior.retry; + return canReusePrior + ? prior.type + : generateUnknownResultQuery(sql, queryName); +} + export function extractParameterTypes(sql: string): Record { const paramTypes: Record = {}; // Alternation order matters: TIMESTAMP_NTZ must precede TIMESTAMP so the @@ -391,14 +420,27 @@ export function inferParameterTypes( * @param warehouseId - the warehouse id to use for schema analysis * @param options - options for the query generation * @param options.noCache - if true, skip the cache and regenerate all types + * @param options.mode - preflight policy: "dev" never blocks the developer + * (degrades a not-ready warehouse, but still describes a RUNNING one), + * "blocking" waits for a startable warehouse and treats a stopped one as fatal, + * "degrade" never probes the warehouse and never describes (emits cached/`unknown` + * types and returns immediately). Defaults to "dev". * @returns an array of query schemas */ export async function generateQueriesFromDescribe( queryFolder: string, warehouseId: string, - options: { noCache?: boolean; concurrency?: number } = {}, + options: { + noCache?: boolean; + concurrency?: number; + mode?: PreflightMode; + } = {}, ): Promise { - const { noCache = false, concurrency: rawConcurrency = 10 } = options; + const { + noCache = false, + concurrency: rawConcurrency = 10, + mode = "dev", + } = options; const concurrency = typeof rawConcurrency === "number" && Number.isFinite(rawConcurrency) ? Math.max(1, Math.floor(rawConcurrency)) @@ -517,6 +559,14 @@ export async function generateQueriesFromDescribe( status: "empty"; index: number; schema: QuerySchema; + } + | { + // Warehouse reachable but returned a non-terminal state (PENDING/ + // RUNNING) with no rows — stopped/cold-starting/busy. Degrade like a + // transient outage (reuse cache or `unknown`); not cached, not empty. + status: "unavailable"; + index: number; + schema: QuerySchema; }; const freshResults: Array<{ index: number; schema: QuerySchema }> = []; @@ -526,161 +576,260 @@ export async function generateQueriesFromDescribe( const fatalErrors: QueryFatalError[] = []; if (uncachedQueries.length > 0) { - let completed = 0; - const total = uncachedQueries.length; - spinner.start( - `Describing ${total} ${total === 1 ? "query" : "queries"} (0/${total})`, - ); + // One-time warehouse preflight (before issuing any DESCRIBE). A single + // warehouses.get classifies the warehouse so we can skip the whole describe + // batch when it can't serve this run, instead of letting every query fail + // (and re-fail next run). Reuses this file's degrade/classify helpers so a + // not-ready warehouse degrades exactly like a per-query outage. + let decision: ReturnType = "proceed"; + let fatalMessage = ""; + if (mode === "degrade") { + // `degrade` never describes and must make ZERO warehouse round-trips: skip + // the probe entirely (no getWarehouseState) and go straight to degradeAll. + // A one-shot CLI (`--no-block`) can't describe in the background, so it + // emits best-available types (reused cache or `unknown`) and returns now. + decision = "degradeAll"; + } else { + try { + const state = await getWarehouseState(client, warehouseId); + decision = decidePreflight(state, mode); + if (decision === "fatal") { + fatalMessage = `warehouse ${warehouseId} is ${state}`; + } + if (decision === "waitThenProceed") { + const final = await waitUntilRunning(client, warehouseId, { + maxMs: PREFLIGHT_WAIT_MAX_MS, + }); + if (final === "RUNNING") { + decision = "proceed"; + } else { + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId} did not reach RUNNING (now ${final})`; + } + } + } catch (err) { + if (isConnectivityError(err)) { + // Warehouse unreachable (transient outage): degrade silently like a + // per-query connectivity failure — never fail a build on a blip. + decision = "degradeAll"; + } else { + // Auth, bad warehouse id, malformed config, or a timed-out wait: fatal. + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId}: ${getErrorDiagnostic(err)}`; + } + } + } - const describeOne = async ({ - index, - queryName, - sql, - sqlHash, - cleanedSql, - }: (typeof uncachedQueries)[number]): Promise => { - const result = (await client.statementExecution.executeStatement({ - statement: `DESCRIBE QUERY ${cleanedSql}`, - warehouse_id: warehouseId, - })) as DatabricksStatementExecutionResponse; - - completed++; - spinner.update( - `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + if (decision !== "proceed") { + // degradeAll or fatal: skip DESCRIBE entirely. Every uncached query gets a + // degraded schema (reused cache or `unknown`); fatal additionally records + // a fatalError per query so the caller fails the build after writing. + const kind = decision === "fatal" ? "fatal" : "connectivity"; + for (const { index, queryName, sql, sqlHash } of uncachedQueries) { + freshResults.push({ + index, + schema: { + name: queryName, + type: degradedType(cache, queryName, sql, sqlHash), + }, + }); + if (decision === "fatal") { + fatalErrors.push({ name: queryName, message: fatalMessage }); + logEntries.push({ + queryName, + status: "MISS", + kind, + error: { message: fatalMessage }, + }); + } else { + logEntries.push({ queryName, status: "MISS", kind }); + } + } + } else { + let completed = 0; + const total = uncachedQueries.length; + spinner.start( + `Describing ${total} ${total === 1 ? "query" : "queries"} (0/${total})`, ); - logger.debug( - "DESCRIBE result for %s: state=%s, rows=%d", + const describeOne = async ({ + index, queryName, - result.status.state, - result.result?.data_array?.length ?? 0, - ); + sql, + sqlHash, + cleanedSql, + }: (typeof uncachedQueries)[number]): Promise => { + const result = (await client.statementExecution.executeStatement({ + statement: `DESCRIBE QUERY ${cleanedSql}`, + warehouse_id: warehouseId, + })) as DatabricksStatementExecutionResponse; + + completed++; + spinner.update( + `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + ); + + logger.debug( + "DESCRIBE result for %s: state=%s, rows=%d", + queryName, + result.status.state, + result.result?.data_array?.length ?? 0, + ); + + if (result.status.state === "FAILED") { + // The warehouse was reachable and ran DESCRIBE, but the statement + // failed — a genuine SQL error (bad table, syntax, incompatible type). + const sqlError = + result.status.error?.message || "Query execution failed"; + logger.warn("DESCRIBE failed for %s: %s", queryName, sqlError); + const type = generateUnknownResultQuery(sql, queryName); + return { + status: "syntax", + index, + schema: { name: queryName, type }, + error: parseError(sqlError), + }; + } - if (result.status.state === "FAILED") { - // The warehouse was reachable and ran DESCRIBE, but the statement - // failed — a genuine SQL error (bad table, syntax, incompatible type). - const sqlError = - result.status.error?.message || "Query execution failed"; - logger.warn("DESCRIBE failed for %s: %s", queryName, sqlError); - const type = generateUnknownResultQuery(sql, queryName); + if (result.status.state !== "SUCCEEDED") { + // Non-terminal state (PENDING/RUNNING) with no result rows: the + // warehouse is reachable but not ready (stopped, cold-starting, or + // busy). Degrade like a transient outage — reuse the last-good cached + // type when the SQL is unchanged, else emit `unknown`. Never "empty": + // treating this as empty would emit `result: unknown` AND discard the + // good cached type, silently throwing away working types. + return { + status: "unavailable", + index, + schema: { + name: queryName, + type: degradedType(cache, queryName, sql, sqlHash), + }, + }; + } + + const { type, hasResults } = convertToQueryType(result, sql, queryName); + if (!hasResults) { + // Described, but no result columns. Emit `unknown` and retry next run; + // do not cache (we never persist `result: unknown`). + return { status: "empty", index, schema: { name: queryName, type } }; + } return { - status: "syntax", + status: "ok", index, schema: { name: queryName, type }, - error: parseError(sqlError), + cacheEntry: { hash: sqlHash, type, retry: false }, }; - } - - const { type, hasResults } = convertToQueryType(result, sql, queryName); - if (!hasResults) { - // Described, but no result columns. Emit `unknown` and retry next run; - // do not cache (we never persist `result: unknown`). - return { status: "empty", index, schema: { name: queryName, type } }; - } - return { - status: "ok", - index, - schema: { name: queryName, type }, - cacheEntry: { hash: sqlHash, type, retry: false }, }; - }; - // Process in chunks, saving cache after each chunk - const processBatchResults = ( - settled: PromiseSettledResult[], - batchOffset: number, - ) => { - for (let i = 0; i < settled.length; i++) { - const entry = settled[i]; - const { queryName } = uncachedQueries[batchOffset + i]; - - if (entry.status === "fulfilled") { - const res = entry.value; - freshResults.push({ index: res.index, schema: res.schema }); - - if (res.status === "ok") { - // Only a successful describe with a result schema is cached. - cache.queries[queryName] = res.cacheEntry; - logEntries.push({ queryName, status: "MISS" }); - } else if (res.status === "syntax") { - // Genuine SQL error — record it for the caller's prod/dev gate. - // Not cached: re-described next run so a fixed query recovers. - syntaxErrors.push({ name: queryName, message: res.error.message }); - logEntries.push({ - queryName, - status: "MISS", - kind: "syntax", - error: res.error, - }); + // Process in chunks, saving cache after each chunk + const processBatchResults = ( + settled: PromiseSettledResult[], + batchOffset: number, + ) => { + for (let i = 0; i < settled.length; i++) { + const entry = settled[i]; + const { queryName } = uncachedQueries[batchOffset + i]; + + if (entry.status === "fulfilled") { + const res = entry.value; + freshResults.push({ index: res.index, schema: res.schema }); + + if (res.status === "ok") { + // Only a successful describe with a result schema is cached. + cache.queries[queryName] = res.cacheEntry; + logEntries.push({ queryName, status: "MISS" }); + } else if (res.status === "syntax") { + // Genuine SQL error — record it for the caller's prod/dev gate. + // Not cached: re-described next run so a fixed query recovers. + syntaxErrors.push({ + name: queryName, + message: res.error.message, + }); + logEntries.push({ + queryName, + status: "MISS", + kind: "syntax", + error: res.error, + }); + } else if (res.status === "empty") { + // status === "empty": described, no columns. Soft unknown, not cached. + logEntries.push({ queryName, status: "MISS", kind: "empty" }); + } else { + // status === "unavailable": non-terminal DESCRIBE (warehouse + // stopped/cold-starting/busy). Degrade like a transient outage: + // tag OFFLINE, count as degraded, never cache. + logEntries.push({ + queryName, + status: "MISS", + kind: "connectivity", + }); + } } else { - // status === "empty": described, no columns. Soft unknown, not cached. - logEntries.push({ queryName, status: "MISS", kind: "empty" }); - } - } else { - // executeStatement rejected without a normal StatementExecution result. - // Only structured transport/connectivity failures are treated as - // offline; auth, bad warehouse IDs, malformed requests, and SDK/config - // failures stay fatal so users fix the underlying setup issue. - completed++; - spinner.update( - `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, - ); - - const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; - const reason = getErrorDiagnostic(entry.reason); - const error = parseError(reason); - const prior = cache.queries[queryName]; - const canReusePrior = prior?.hash === sqlHash && !prior.retry; - const type = canReusePrior - ? prior.type - : generateUnknownResultQuery(sql, queryName); - freshResults.push({ index, schema: { name: queryName, type } }); - - if (!isConnectivityError(entry.reason)) { - fatalErrors.push({ name: queryName, message: error.message }); + // executeStatement rejected without a normal StatementExecution result. + // Only structured transport/connectivity failures are treated as + // offline; auth, bad warehouse IDs, malformed requests, and SDK/config + // failures stay fatal so users fix the underlying setup issue. + completed++; + spinner.update( + `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + ); + + const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; + const reason = getErrorDiagnostic(entry.reason); + const error = parseError(reason); + const priorEntry = cache.queries[queryName]; + const canReusePrior = + priorEntry?.hash === sqlHash && !priorEntry.retry; + const type = degradedType(cache, queryName, sql, sqlHash); + freshResults.push({ index, schema: { name: queryName, type } }); + + if (!isConnectivityError(entry.reason)) { + fatalErrors.push({ name: queryName, message: error.message }); + logEntries.push({ + queryName, + status: "MISS", + kind: "fatal", + error, + }); + continue; + } + + logger.warn( + "DESCRIBE unreachable for %s: %s — %s", + queryName, + reason, + canReusePrior + ? "reusing last cached type" + : "emitting unknown (no matching cache)", + ); logEntries.push({ queryName, status: "MISS", - kind: "fatal", + kind: "connectivity", error, }); - continue; } - - logger.warn( - "DESCRIBE unreachable for %s: %s — %s", - queryName, - reason, - canReusePrior - ? "reusing last cached type" - : "emitting unknown (no matching cache)", - ); - logEntries.push({ - queryName, - status: "MISS", - kind: "connectivity", - error, - }); } - } - }; + }; - if (uncachedQueries.length > concurrency) { - for (let b = 0; b < uncachedQueries.length; b += concurrency) { - const batch = uncachedQueries.slice(b, b + concurrency); - const batchResults = await Promise.allSettled(batch.map(describeOne)); - processBatchResults(batchResults, b); + if (uncachedQueries.length > concurrency) { + for (let b = 0; b < uncachedQueries.length; b += concurrency) { + const batch = uncachedQueries.slice(b, b + concurrency); + const batchResults = await Promise.allSettled(batch.map(describeOne)); + processBatchResults(batchResults, b); + await saveCache(cache); + } + } else { + const settled = await Promise.allSettled( + uncachedQueries.map(describeOne), + ); + processBatchResults(settled, 0); await saveCache(cache); } - } else { - const settled = await Promise.allSettled( - uncachedQueries.map(describeOne), - ); - processBatchResults(settled, 0); - await saveCache(cache); - } - spinner.stop(""); + spinner.stop(""); + } } const elapsed = ((performance.now() - startTime) / 1000).toFixed(2); @@ -742,7 +891,7 @@ export async function generateQueriesFromDescribe( parts.push( `${syntaxCount} SQL ${syntaxCount === 1 ? "error" : "errors"}`, ); - if (offlineCount > 0) parts.push(`${offlineCount} unreachable`); + if (offlineCount > 0) parts.push(`${offlineCount} degraded`); if (emptyCount > 0) parts.push(`${emptyCount} empty`); if (fatalCount > 0) parts.push( diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index 637bbc161..ea46f03de 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -4,6 +4,10 @@ const mocks = vi.hoisted(() => ({ readdir: vi.fn(), readFile: vi.fn(), executeStatement: vi.fn(), + // Warehouse preflight probe. Defaults to RUNNING so every existing describe + // test takes the "proceed" path unchanged; override per-test to exercise + // stopped/starting/unreachable preflight branches. + getWarehouse: vi.fn(() => ({ state: "RUNNING" })), spinnerStop: vi.fn(), spinnerPrintDetail: vi.fn(), loadCache: vi.fn(() => ({ version: "2", queries: {} })), @@ -20,6 +24,7 @@ vi.mock("node:fs/promises", () => ({ vi.mock("@databricks/sdk-experimental", () => ({ WorkspaceClient: vi.fn(() => ({ statementExecution: { executeStatement: mocks.executeStatement }, + warehouses: { get: mocks.getWarehouse }, })), })); @@ -64,6 +69,9 @@ function succeededResult(columns: [string, string, string | null][]) { describe("generateQueriesFromDescribe", () => { beforeEach(() => { vi.clearAllMocks(); + // Re-establish the RUNNING default so a per-test preflight override (e.g. + // mockReturnValue/mockImplementation) never leaks into the next test. + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); }); test("success path — returns query schema", async () => { @@ -389,6 +397,27 @@ describe("generateQueriesFromDescribe", () => { expect(fatalErrors).toEqual([]); }); + test("SDK DNS wrapper (Can't connect to ..., code 500) is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign( + new Error( + "Can't connect to https://x.cloud.databricks.com/api/2.0/sql/statements", + ), + { code: 500 }, + ), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + test("TLS certificate message is classified as connectivity", async () => { mocks.readdir.mockResolvedValue(["users.sql"]); mocks.readFile.mockResolvedValue("SELECT id FROM users"); @@ -475,6 +504,61 @@ describe("generateQueriesFromDescribe", () => { expect(lastSavedQueries()).not.toHaveProperty("empty"); }); + test("PENDING (non-terminal, warehouse not ready) degrades to unknown, not empty, not cached", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + // Warehouse stopped/cold-starting: hybrid DESCRIBE returns a non-terminal + // state with no result rows. Must degrade like a transient outage, not be + // misreported as EMPTY (which would discard a good cached type). + mocks.executeStatement.mockResolvedValue({ + statement_id: "stmt-1", + status: { state: "PENDING" }, + }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123"); + + expect(schemas).toHaveLength(1); + expect(schemas[0].name).toBe("users"); + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + // a non-ready warehouse must never persist `result: unknown` + expect(lastSavedQueries()).not.toHaveProperty("users"); + }); + + test("PENDING reuses a prior good cached type when the SQL hash matches", async () => { + // `users.sql` and `users.obo.sql` normalize to the same query name and hold + // identical SQL (same hash). With concurrency=1 the first DESCRIBE SUCCEEDS + // and its batch commits a good cached type; the second batch comes back + // non-terminal (warehouse not ready) and must reuse that freshly-cached good + // type rather than overwrite it with unknown. + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql", "users.obo.sql"]); + mocks.readFile.mockResolvedValue(sql); + mocks.executeStatement + .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) + .mockResolvedValueOnce({ + statement_id: "stmt-pending", + status: { state: "RUNNING" }, + }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + concurrency: 1, + }); + + expect(schemas).toHaveLength(2); + // both entries resolve to the good type — the PENDING one reuses the cache + expect(schemas[0].type).toContain("id: number"); + expect(schemas[1].type).toContain("id: number"); + expect(schemas[1].type).not.toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + // the good cached type persists; PENDING never overwrites it with unknown + expect(lastSavedQueries()?.users.type).toContain("id: number"); + }); + test("syntax error (FAILED) is recorded in syntaxErrors and not cached", async () => { mocks.readdir.mockResolvedValue(["broken.sql"]); mocks.readFile.mockResolvedValue("SELECT * FROM missing"); @@ -540,4 +624,177 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[0].type).toContain("id: number"); expect(schemas[0].type).not.toBe("STALE_UNKNOWN"); }); + + describe("warehouse preflight", () => { + test("STOPPED + blocking mode — fatal per query, never describes", async () => { + mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM a") + .mockResolvedValueOnce("SELECT id FROM b"); + mocks.getWarehouse.mockReturnValue({ state: "STOPPED" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + + // One fatal entry per uncached query; the whole describe batch is skipped. + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([ + { name: "a", message: "warehouse wh-123 is STOPPED" }, + { name: "b", message: "warehouse wh-123 is STOPPED" }, + ]); + expect(syntaxErrors).toEqual([]); + expect(schemas).toHaveLength(2); + expect(schemas[0].type).toContain("result: unknown"); + expect(schemas[1].type).toContain("result: unknown"); + }); + + test("STOPPED + dev mode — degrades silently, never describes", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + mocks.getWarehouse.mockReturnValue({ state: "STOPPED" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "dev", + }); + + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([]); + expect(syntaxErrors).toEqual([]); + expect(schemas[0].type).toContain("result: unknown"); + // degraded, never a fatal failure + expect(lastSavedQueries()).toBeUndefined(); + }); + + test("STARTING + blocking — waits for RUNNING, then describes normally", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STARTING (→ waitThenProceed); waitUntilRunning polls + // STARTING once more, then RUNNING. After that, DESCRIBE runs. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STARTING" }) + .mockReturnValueOnce({ state: "STARTING" }) + .mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const promise = generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + // Drive the wait loop's backoff sleep(s) so it can re-poll and observe + // RUNNING. Run pending timers until the work settles. + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas).toHaveLength(1); + expect(schemas[0].type).toContain("id: number"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + } finally { + vi.useRealTimers(); + } + }); + + test("preflight connectivity error — degradeAll, never describes", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + mocks.getWarehouse.mockImplementation(() => { + throw Object.assign( + new Error("Can't connect to https://x.cloud.databricks.com"), + { code: 500 }, + ); + }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + + // Unreachable warehouse degrades silently — even in blocking mode. + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([]); + expect(syntaxErrors).toEqual([]); + expect(schemas[0].type).toContain("result: unknown"); + }); + + test("RUNNING preflight — describes normally", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + { mode: "blocking" }, + ); + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas[0].type).toContain("id: number"); + expect(fatalErrors).toEqual([]); + }); + + test("degrade mode — skips probe + describes even when warehouse is RUNNING", async () => { + mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM a") + .mockResolvedValueOnce("SELECT id FROM b"); + // A RUNNING warehouse would normally take the proceed path and describe + // every query. In `degrade` mode the warehouse is never even probed. + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "degrade", + }); + + // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE. + expect(mocks.getWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + // Best-available types: no cache seeded → every query degrades to unknown. + expect(schemas).toHaveLength(2); + expect(schemas[0].name).toBe("a"); + expect(schemas[0].type).toContain("result: unknown"); + expect(schemas[1].name).toBe("b"); + expect(schemas[1].type).toContain("result: unknown"); + // Degraded, never a failure. + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + }); + + test("degrade mode — reuses the cached type when the SQL hash matches", async () => { + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue(sql); + // Seed a last-good cached type under the current SQL hash. degrade mode + // serves it via the normal cache HIT path — still no probe, no DESCRIBE. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + users: { hash: hashSQL(sql), type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "degrade", + }); + + expect(mocks.getWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(schemas).toHaveLength(1); + expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + }); + }); }); diff --git a/packages/appkit/src/type-generator/tests/preflight.test.ts b/packages/appkit/src/type-generator/tests/preflight.test.ts new file mode 100644 index 000000000..f68eaa513 --- /dev/null +++ b/packages/appkit/src/type-generator/tests/preflight.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, test } from "vitest"; +import { + decidePreflight, + type PreflightDecision, + type PreflightMode, +} from "../preflight"; +import type { WarehouseState } from "../warehouse-status"; + +// Every (state × mode) pair the policy must implement, plus an unknown-state +// case that must fall through to "proceed". +const cases: Array<{ + state: WarehouseState | "WEIRD_FUTURE_STATE"; + mode: PreflightMode; + expected: PreflightDecision; +}> = [ + { state: "RUNNING", mode: "dev", expected: "proceed" }, + { state: "RUNNING", mode: "blocking", expected: "proceed" }, + + { state: "STARTING", mode: "dev", expected: "degradeAll" }, + { state: "STARTING", mode: "blocking", expected: "waitThenProceed" }, + + { state: "STOPPED", mode: "dev", expected: "degradeAll" }, + { state: "STOPPED", mode: "blocking", expected: "fatal" }, + { state: "STOPPING", mode: "dev", expected: "degradeAll" }, + { state: "STOPPING", mode: "blocking", expected: "fatal" }, + + { state: "DELETED", mode: "dev", expected: "fatal" }, + { state: "DELETED", mode: "blocking", expected: "fatal" }, + { state: "DELETING", mode: "dev", expected: "fatal" }, + { state: "DELETING", mode: "blocking", expected: "fatal" }, + + // Unknown state: backstop is the describe loop, so don't block. + { state: "WEIRD_FUTURE_STATE", mode: "dev", expected: "proceed" }, + { state: "WEIRD_FUTURE_STATE", mode: "blocking", expected: "proceed" }, + + // `degrade` never describes: every state (even RUNNING) maps to degradeAll. + { state: "RUNNING", mode: "degrade", expected: "degradeAll" }, + { state: "STARTING", mode: "degrade", expected: "degradeAll" }, + { state: "STOPPED", mode: "degrade", expected: "degradeAll" }, + { state: "STOPPING", mode: "degrade", expected: "degradeAll" }, + { state: "DELETED", mode: "degrade", expected: "degradeAll" }, + { state: "DELETING", mode: "degrade", expected: "degradeAll" }, + { state: "WEIRD_FUTURE_STATE", mode: "degrade", expected: "degradeAll" }, +]; + +describe("decidePreflight", () => { + test.each(cases)( + "$state + $mode -> $expected", + ({ state, mode, expected }) => { + expect(decidePreflight(state as WarehouseState, mode)).toBe(expected); + }, + ); +}); diff --git a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts new file mode 100644 index 000000000..86e0a8bc8 --- /dev/null +++ b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts @@ -0,0 +1,389 @@ +import { EventEmitter } from "node:events"; +import path from "node:path"; +import type { Plugin, ViteDevServer } from "vite"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import type { WarehouseState } from "../warehouse-status"; + +const mocks = vi.hoisted(() => ({ + generateFromEntryPoint: vi.fn(), + getWarehouseState: vi.fn(), + startWarehouse: vi.fn(), + waitUntilRunning: vi.fn(), +})); + +// Mock the module vite-plugin.ts pulls generateFromEntryPoint from. The error +// classes are imported for `instanceof` checks in the catch block, so they must +// remain real constructors — only the warehouse-touching entry point is spied. +vi.mock("../index", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + generateFromEntryPoint: mocks.generateFromEntryPoint, + }; +}); + +// Mock the warehouse-status helpers so the background watch is fully driven by +// the test (no real WorkspaceClient / SDK calls). +vi.mock("../warehouse-status", () => ({ + getWarehouseState: mocks.getWarehouseState, + startWarehouse: mocks.startWarehouse, + waitUntilRunning: mocks.waitUntilRunning, +})); + +// armWarehouseWatch constructs `new WorkspaceClient({})`. Stub the SDK so that +// constructor is inert in unit tests. +vi.mock("@databricks/sdk-experimental", () => ({ + WorkspaceClient: class {}, +})); + +const { appKitTypesPlugin } = await import("../vite-plugin"); + +// The plugin hooks are loosely typed on Vite's Plugin; cast to the shapes we +// actually drive so we can call them directly without a Vite build. +type ConfigResolvedHook = (config: { root: string }) => void; +type BuildStartHook = () => unknown; +type ConfigureServerHook = (server: ViteDevServer) => void; + +function getHook( + plugin: Plugin, + name: "configResolved" | "buildStart" | "configureServer", +): T { + const hook = plugin[name]; + if (typeof hook !== "function") { + throw new Error(`expected ${name} to be a function hook`); + } + return hook as T; +} + +/** + * A deferred promise whose settlement the test controls — used to hold a + * generateFromEntryPoint call "in flight" while a second trigger arrives, so we + * can observe single-flight coalescing deterministically. + */ +function deferred() { + let resolve!: () => void; + const promise = new Promise((r) => { + resolve = r; + }); + return { promise, resolve }; +} + +/** Construct the plugin and drive configResolved so outFile/watchFolders set. */ +function makeConfiguredPlugin() { + const plugin = appKitTypesPlugin(); + const configResolved = getHook(plugin, "configResolved"); + // configResolved derives outFile/watchFolders from config.root; a client + // sub-folder mirrors the real layout (projectRoot = config.root/..). + configResolved({ root: path.join(process.cwd(), "client") }); + return plugin; +} + +/** Drive configResolved + buildStart so generate() runs to the spy. */ +async function runPlugin() { + const plugin = makeConfiguredPlugin(); + const buildStart = getHook(plugin, "buildStart"); + await buildStart(); +} + +/** + * Minimal ViteDevServer stand-in: a chokidar-like `watcher` (EventEmitter with + * a no-op `add`) plus an `httpServer` EventEmitter so the close-cleanup hook can + * register. Returns the doubles so tests can emit "change"/"close". + */ +function makeFakeServer() { + const watcher = Object.assign(new EventEmitter(), { add: vi.fn() }); + const httpServer = new EventEmitter(); + const server = { watcher, httpServer } as unknown as ViteDevServer; + return { server, watcher, httpServer }; +} + +/** + * Settle the microtask queue so awaited generate/watch chains progress. The + * background watch threads several awaits (getWarehouseState → waitUntilRunning + * → runGenerate → generateOnce → generateFromEntryPoint), so drain generously. + */ +async function flush() { + for (let i = 0; i < 12; i++) { + await Promise.resolve(); + } +} + +describe("appKitTypesPlugin — generation mode", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + // Default the warehouse watch to a no-op (RUNNING => no wait) so tests that + // don't exercise it aren't perturbed by a background regenerate. + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.startWarehouse.mockResolvedValue(undefined); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + // A non-empty warehouse ID is required or generate() short-circuits before + // ever calling generateFromEntryPoint. + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test('passes mode: "dev" when NODE_ENV is not production', async () => { + process.env.NODE_ENV = "development"; + + await runPlugin(); + + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "dev" }), + ); + }); + + test('passes mode: "blocking" when NODE_ENV is production', async () => { + process.env.NODE_ENV = "production"; + + await runPlugin(); + + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("skips generation when warehouse ID is absent", async () => { + delete process.env.DATABRICKS_WAREHOUSE_ID; + + await runPlugin(); + + expect(mocks.generateFromEntryPoint).not.toHaveBeenCalled(); + }); +}); + +describe("appKitTypesPlugin — single-flight generate", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + // Watch is a no-op here (RUNNING) so it can't add stray generate calls. + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.startWarehouse.mockResolvedValue(undefined); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + process.env.NODE_ENV = "development"; + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test("coalesces overlapping triggers into one in-flight + one trailing run", async () => { + // First generate hangs on a deferred; while it's in flight we fire two more + // triggers. They must NOT start concurrently — they collapse into a single + // trailing run after the first settles. + const first = deferred(); + const second = deferred(); + mocks.generateFromEntryPoint + .mockReturnValueOnce(first.promise) + .mockReturnValueOnce(second.promise); + + const plugin = makeConfiguredPlugin(); + const { server, watcher } = makeFakeServer(); + const configureServer = getHook( + plugin, + "configureServer", + ); + const buildStart = getHook(plugin, "buildStart"); + + configureServer(server); + + // Trigger 1: the initial build. Starts generate #1 (now in flight). + await buildStart(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + const sqlFile = path.join(process.cwd(), "config", "queries", "q.sql"); + // Triggers 2 and 3 arrive while #1 is still in flight: no new run starts. + watcher.emit("change", sqlFile); + watcher.emit("change", sqlFile); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + // Settle #1 → exactly ONE trailing run fires for the coalesced triggers. + first.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + + // Settle the trailing run; no further runs queued. + second.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + + test("a trigger after the previous run settled starts a fresh run", async () => { + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + + const plugin = makeConfiguredPlugin(); + const { server, watcher } = makeFakeServer(); + getHook(plugin, "configureServer")(server); + const buildStart = getHook(plugin, "buildStart"); + + await buildStart(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + // Nothing in flight now: a later .sql change runs generate again. + watcher.emit( + "change", + path.join(process.cwd(), "config", "queries", "q.sql"), + ); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); +}); + +describe("appKitTypesPlugin — background warehouse watch", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + mocks.startWarehouse.mockResolvedValue(undefined); + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test("STOPPED → starts the warehouse, then RUNNING regenerates in dev", async () => { + process.env.NODE_ENV = "development"; + // Warehouse is stopped; the watch must kick off a start, then the poller + // sees it reach RUNNING. + mocks.getWarehouseState.mockResolvedValue("STOPPED" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // A stopped warehouse is nudged to start before we wait on it. + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + // Because WE issued the start, the wait must poll through a stale post-start + // STOPPED/STOPPING reading instead of bailing — assert the flag is set. + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: true }), + ); + // Call 1: initial buildStart generate. Call 2: the watch's regenerate once + // the warehouse reached RUNNING. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + + test("STARTING → waits without starting, then RUNNING regenerates in dev", async () => { + process.env.NODE_ENV = "development"; + // Warehouse is cold-starting, then warms up to RUNNING. + mocks.getWarehouseState.mockResolvedValue("STARTING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // Already coming up: no redundant start, just wait + regenerate. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + // We didn't start it, so the wait keeps the default terminal states (a + // STOPPED reading here would be a real stop, not a stale pre-start blip). + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: false }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + + test("RUNNING state does not start, wait, or regenerate", async () => { + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // Already RUNNING: the watch returns before starting or waiting, leaving + // only the single initial generate. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("no watch in production (armWarehouseWatch no-ops)", async () => { + process.env.NODE_ENV = "production"; + // Even if the warehouse were STOPPED, production must not arm the watch. + mocks.getWarehouseState.mockResolvedValue("STOPPED" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + expect(mocks.getWarehouseState).not.toHaveBeenCalled(); + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + // Only the blocking initial build runs. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("aborts the armed watch when the dev server closes", async () => { + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("STARTING" as WarehouseState); + + // Capture the signal handed to waitUntilRunning so we can assert the close + // hook aborts it. Keep the wait pending until then. + let capturedSignal: AbortSignal | undefined; + const wait = deferred(); + mocks.waitUntilRunning.mockImplementation( + (_client, _id, opts: { signal?: AbortSignal }) => { + capturedSignal = opts.signal; + return wait.promise.then(() => "RUNNING" as WarehouseState); + }, + ); + + const plugin = makeConfiguredPlugin(); + const { server, httpServer } = makeFakeServer(); + getHook(plugin, "configureServer")(server); + const buildStart = getHook(plugin, "buildStart"); + + await buildStart(); + await flush(); + + expect(capturedSignal).toBeDefined(); + expect(capturedSignal?.aborted).toBe(false); + + // Dev server shutdown must abort the pending warehouse wait. + httpServer.emit("close"); + expect(capturedSignal?.aborted).toBe(true); + + // Let the (now-aborted) wait settle; the IIFE swallows it and skips the + // regenerate because the signal is aborted. + wait.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/appkit/src/type-generator/tests/warehouse-status.test.ts b/packages/appkit/src/type-generator/tests/warehouse-status.test.ts new file mode 100644 index 000000000..700df2599 --- /dev/null +++ b/packages/appkit/src/type-generator/tests/warehouse-status.test.ts @@ -0,0 +1,194 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { + getWarehouseState, + type WarehouseState, + waitUntilRunning, +} from "../warehouse-status"; + +/** + * Build a minimal WorkspaceClient stub exposing only `warehouses.get`, the one + * method these helpers touch. Cast through `unknown` to the SDK type so callers + * type-check without us constructing a real client. + */ +function makeClient(get: ReturnType): WorkspaceClient { + return { warehouses: { get } } as unknown as WorkspaceClient; +} + +/** A warehouses.get resolution carrying a given lifecycle state. */ +const stateResponse = (state: WarehouseState) => ({ state }); + +describe("getWarehouseState", () => { + test("returns the .state from warehouses.get", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("RUNNING")); + const client = makeClient(get); + + await expect(getWarehouseState(client, "wh-1")).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledWith({ id: "wh-1" }); + }); + + test("propagates errors from warehouses.get (does not catch)", async () => { + const get = vi.fn().mockRejectedValue(new Error("boom")); + const client = makeClient(get); + + await expect(getWarehouseState(client, "wh-1")).rejects.toThrow("boom"); + }); +}); + +describe("waitUntilRunning", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + test("resolves RUNNING after polling through STARTING states", async () => { + const get = vi + .fn() + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("RUNNING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { maxMs: 60000 }); + + // Drive the fake clock past each backoff delay (1000ms, then 2000ms) so the + // subsequent polls fire. advanceTimersByTimeAsync also flushes the awaited + // getWarehouseState microtasks between polls. + await vi.advanceTimersByTimeAsync(1000); + await vi.advanceTimersByTimeAsync(2000); + + await expect(promise).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledTimes(3); + }); + + test("resolves with a not-coming-up state without waiting (STOPPED)", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STOPPED")); + const client = makeClient(get); + + // First poll already returns STOPPED, so no timer advance is needed. + await expect( + waitUntilRunning(client, "wh-1", { maxMs: 60000 }), + ).resolves.toBe("STOPPED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("STOPPED stays terminal when treatStoppedAsTransient is false", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STOPPED")); + const client = makeClient(get); + + // Explicit false mirrors the default: STOPPED is terminal, resolves at once. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: false, + }), + ).resolves.toBe("STOPPED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("treatStoppedAsTransient polls through STOPPED until RUNNING", async () => { + // A start was just issued, so the first poll still reports the stale STOPPED + // before the start propagates. With the flag on we must NOT bail on it — + // keep polling (STOPPED → STARTING → RUNNING) and resolve RUNNING. + const get = vi + .fn() + .mockResolvedValueOnce(stateResponse("STOPPED")) + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("RUNNING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: true, + }); + + // Drive past each backoff (1000ms, then 2000ms) so the later polls fire. + await vi.advanceTimersByTimeAsync(1000); + await vi.advanceTimersByTimeAsync(2000); + + await expect(promise).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledTimes(3); + }); + + test("treatStoppedAsTransient still treats DELETED as terminal", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("DELETED")); + const client = makeClient(get); + + // A deleted warehouse genuinely can't reach RUNNING, so even with the flag + // on it resolves immediately with the observed state. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: true, + }), + ).resolves.toBe("DELETED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("rejects when maxMs elapses while still STARTING", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { maxMs: 3000 }); + // Attach a rejection handler immediately so the eventual throw isn't an + // unhandled rejection while we advance the clock. + const settled = expect(promise).rejects.toThrow( + /wh-1 did not reach RUNNING within 3000ms/, + ); + + // Push well past the 3000ms budget; exponential backoff (1000 + 2000 = 3000) + // means the deadline check trips on the next iteration. + await vi.advanceTimersByTimeAsync(10000); + + await settled; + }); + + test("stops immediately when the signal is already aborted", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + + const controller = new AbortController(); + controller.abort(); + + // The pre-loop abort check throws before the first poll, so warehouses.get + // is never even called. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + signal: controller.signal, + }), + ).rejects.toMatchObject({ name: "AbortError" }); + expect(get).not.toHaveBeenCalled(); + }); + + test("stops promptly when aborted mid-wait", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + const controller = new AbortController(); + + const promise = waitUntilRunning(client, "wh-1", { + maxMs: 60000, + signal: controller.signal, + }); + const settled = expect(promise).rejects.toMatchObject({ + name: "AbortError", + }); + + // Flush only the first poll's microtask (advance 0, not the full 1000ms + // backoff): the wait is now parked on its first backoff sleep, one poll in. + await vi.advanceTimersByTimeAsync(0); + expect(get).toHaveBeenCalledTimes(1); + + // Abort while parked between polls: the backoff sleep resolves immediately + // via its abort listener, then the post-sleep abort check throws — so we + // never issue a second poll. + controller.abort(); + await vi.advanceTimersByTimeAsync(0); + + await settled; + expect(get).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts index 1a9117aed..4d217cc7b 100644 --- a/packages/appkit/src/type-generator/vite-plugin.ts +++ b/packages/appkit/src/type-generator/vite-plugin.ts @@ -1,15 +1,31 @@ import { existsSync } from "node:fs"; import path from "node:path"; +import { WorkspaceClient } from "@databricks/sdk-experimental"; import type { Plugin } from "vite"; import { createLogger } from "../logging/logger"; import { ANALYTICS_TYPES_FILE, generateFromEntryPoint, TYPES_DIR, + TypegenFatalError, + TypegenSyntaxError, } from "./index"; +import { + getWarehouseState, + startWarehouse, + waitUntilRunning, +} from "./warehouse-status"; const logger = createLogger("type-generator:vite-plugin"); +/** + * How long the DEV background watcher waits for a STARTING warehouse to reach + * RUNNING before giving up. Short relative to the CLI's preflight budget: this + * is a best-effort "regenerate once the warehouse warms up" convenience, not a + * gate, so we'd rather stop polling than hold a detached task open for minutes. + */ +const DEV_WAREHOUSE_WATCH_MAX_MS = 60_000; + /** * Options for the AppKit types plugin. */ @@ -30,7 +46,27 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { let outFile: string; let watchFolders: string[]; - async function generate() { + // Single-flight state for runGenerate(). `inFlight` is the promise of the + // currently-running drain (null when idle); `queued` records that a trigger + // arrived while a run was active so exactly ONE trailing run fires afterwards + // (latest-wins — coalesces any number of overlapping triggers into a single + // rerun). `queued` is read/cleared synchronously inside the drain loop so a + // trigger landing in any window is caught before the drain exits. + let inFlight: Promise | null = null; + let queued = false; + + // The currently-armed DEV background warehouse watch, if any. Aborting it + // stops a pending waitUntilRunning (server shutdown, or a newer arm replacing + // an older one). + let watchController: AbortController | null = null; + + /** + * Generate types once. Never throws in dev (logs instead); in production it + * rethrows so the build fails. This is the un-guarded core — callers should + * go through {@link runGenerate} so concurrent triggers can't race-write the + * .d.ts. + */ + async function generateOnce() { try { const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || ""; @@ -44,16 +80,156 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { queryFolder: watchFolders[0], warehouseId, noCache: false, + // Production build blocks for warehouse readiness and fails fast on a + // stopped/deleted warehouse; dev rolls forward (degrades) and never + // blocks startup. + mode: process.env.NODE_ENV === "production" ? "blocking" : "dev", }); } catch (error) { + // TypegenSyntaxError / TypegenFatalError carry a complete, actionable + // report in their message. Their stack frames and attached query arrays + // point into appkit internals and only add noise, so surface just the + // message — both when failing the prod build and when logging in dev. + const isTypegenError = + error instanceof TypegenSyntaxError || + error instanceof TypegenFatalError; + // throw in production to fail the build if (process.env.NODE_ENV === "production") { + if (isTypegenError) error.stack = error.message; throw error; } - logger.error("Error generating types: %O", error); + + if (isTypegenError) { + logger.error("%s", error.message); + } else { + logger.error("Error generating types: %O", error); + } } } + /** + * Single-flight wrapper around {@link generateOnce}. The initial build, the + * .sql watcher, and the DEV warehouse watch all route through here so they can + * never run typegen concurrently (which would race-write the .d.ts). + * + * If a run is already in flight, this does NOT start a second one — it sets a + * trailing flag so exactly one more run fires after the current finishes, + * coalescing any number of overlapping triggers (latest-wins). + * + * @returns A promise that resolves when this trigger's work (including any + * trailing run it scheduled) has completed. + */ + function runGenerate(): Promise { + if (inFlight) { + // A run is active: remember that another trigger arrived and ride out the + // current run. One trailing run then covers all coalesced triggers. + queued = true; + return inFlight; + } + + // Drain in a loop rather than recursing after a single queued-check: a + // trigger can land in the window between generateOnce() resolving and the + // check, so we re-test `queued` until it's clear. Critically, `inFlight` is + // cleared synchronously in the SAME tick as the final `queued === false` + // observation — never deferred to a .finally microtask — so there's no + // window where a trigger sees `inFlight` set but the drain has already + // decided to exit. The guard stays held for the whole drain, so concurrent + // triggers only ever set the flag; they never start a parallel generate. + const drain = async (): Promise => { + while (true) { + queued = false; + await generateOnce(); + // Synchronous check + clear, atomic w.r.t. other (synchronous) callers. + if (!queued) { + inFlight = null; + return; + } + } + }; + + inFlight = drain(); + return inFlight; + } + + /** + * DEV-only: warm a stopped/cold-starting warehouse in the background and + * regenerate once it reaches RUNNING, so fresh (non-degraded) types land in + * the editor — without blocking dev startup. + * + * Post-probe behaviour by state: + * - RUNNING → return (the initial build already produced fresh types). + * - DELETED / DELETING → return (a deleted warehouse can't be started). + * - STOPPED / STOPPING → kick off a start, then wait for RUNNING. + * - STARTING → it's already coming up; just wait for RUNNING. + * + * No-op in production or without a warehouse id. Replaces any previously-armed + * watch (aborting it first). Fully self-contained: it never throws into the + * caller and never re-arms itself. + */ + function armWarehouseWatch(): void { + if (process.env.NODE_ENV === "production") return; + + const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || ""; + if (!warehouseId) return; + + // Supersede any in-flight watch so we never run two concurrently. + watchController?.abort(); + const controller = new AbortController(); + watchController = controller; + const { signal } = controller; + + void (async () => { + try { + const client = new WorkspaceClient({}); + const state = await getWarehouseState(client, warehouseId); + + // RUNNING already produced fresh types on the initial build; a deleted + // warehouse can't be started. Neither is worth watching. + if ( + state === "RUNNING" || + state === "DELETED" || + state === "DELETING" + ) { + return; + } + + // Stopped/stopping won't reach RUNNING on its own — nudge it. STARTING + // is already coming up, so don't issue a redundant start. A failed start + // is non-fatal: give up silently rather than throw out of the detached + // task (the developer still has degraded/cached types). + let startedByUs = false; + if (state === "STOPPED" || state === "STOPPING") { + try { + logger.debug("Warehouse is %s; starting it.", state); + await startWarehouse(client, warehouseId); + startedByUs = true; + } catch { + return; + } + } + + const final = await waitUntilRunning(client, warehouseId, { + maxMs: DEV_WAREHOUSE_WATCH_MAX_MS, + signal, + // We just issued the start, so the first poll(s) often still report + // STOPPED/STOPPING before the start propagates. Poll through those + // instead of bailing, or the regenerate would never fire. When we + // didn't start it (STARTING branch), keep the default terminal states. + treatStoppedAsTransient: startedByUs, + }); + + if (final === "RUNNING" && !signal.aborted) { + logger.debug("Warehouse reached RUNNING; regenerating types."); + await runGenerate(); + } + } catch { + // Detached background task: any failure (timeout, abort, connectivity, + // auth) is non-fatal — the developer still has degraded/cached types. + } + })(); + } + return { name: "appkit-types", @@ -84,7 +260,17 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { }, buildStart() { - return generate(); + // Production: block the build on this generate (and surface failures). + // The watch is a dev-only no-op, so just run typegen. + if (process.env.NODE_ENV === "production") { + return runGenerate(); + } + + // Dev: don't block startup waiting on typegen. Kick off the initial + // generate, then arm the warehouse watch so a cold-starting warehouse gets + // a one-shot regenerate once it's RUNNING. + void runGenerate(); + armWarehouseWatch(); }, configureServer(server) { @@ -96,9 +282,20 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { ); if (isWatchedFile && changedFile.endsWith(".sql")) { - generate(); + // Route through the single-flight runner (was fire-and-forget + // generate(), which could race the initial build / watch). Re-arm the + // warehouse watch too: editing a query against a still-starting + // warehouse should also pick up fresh types once it warms up. + void runGenerate(); + armWarehouseWatch(); } }); + + // Tear down any pending warehouse watch when the dev server closes so a + // long backoff can't keep the process alive after shutdown. + server.httpServer?.once("close", () => { + watchController?.abort(); + }); }, }; } diff --git a/packages/appkit/src/type-generator/warehouse-status.ts b/packages/appkit/src/type-generator/warehouse-status.ts new file mode 100644 index 000000000..aa50089ef --- /dev/null +++ b/packages/appkit/src/type-generator/warehouse-status.ts @@ -0,0 +1,176 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; + +/** + * Lifecycle states a SQL warehouse can report. Mirrors the Databricks SDK + * `State` union; redeclared here so callers of this module don't need to reach + * into the SDK's deep type paths. + */ +export type WarehouseState = + | "RUNNING" + | "STARTING" + | "STOPPED" + | "STOPPING" + | "DELETING" + | "DELETED"; + +/** Backoff bounds for {@link waitUntilRunning}. */ +const INITIAL_POLL_MS = 1000; +const MAX_POLL_MS = 15000; + +/** States from which the warehouse will not transition to RUNNING on its own. */ +const NOT_COMING_UP: ReadonlySet = new Set([ + "STOPPED", + "STOPPING", + "DELETED", + "DELETING", +]); + +/** + * Terminal states even when {@link waitUntilRunning} is told to treat + * STOPPED/STOPPING as transient: a deleted (or deleting) warehouse genuinely + * can't reach RUNNING, so we still resolve with the observed state. + */ +const NEVER_COMING_UP: ReadonlySet = new Set([ + "DELETED", + "DELETING", +]); + +/** + * Sleep for `ms`, resolving early if `signal` aborts. The pending timer is + * always cleared (on resolve and on abort) so a long backoff can't keep the + * event loop alive after the caller has bailed. + */ +function delay(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve) => { + if (signal?.aborted) { + resolve(); + return; + } + + const timer = setTimeout(() => { + signal?.removeEventListener("abort", onAbort); + resolve(); + }, ms); + + function onAbort() { + clearTimeout(timer); + resolve(); + } + + signal?.addEventListener("abort", onAbort, { once: true }); + }); +} + +/** + * Fetch the current lifecycle state of a SQL warehouse. + * + * Errors from the SDK (auth, bad warehouse id, connectivity) are intentionally + * NOT caught — the caller decides how to classify and react to them. + */ +export async function getWarehouseState( + client: WorkspaceClient, + warehouseId: string, +): Promise { + const response = await client.warehouses.get({ id: warehouseId }); + return response.state as WarehouseState; +} + +/** + * Initiate a start of a stopped/stopping SQL warehouse. + * + * Only KICKS OFF the start: the SDK's `start()` returns a Waiter, but we + * deliberately do not `.wait()` on it. Blocking on the full cold-start isn't our + * job here — {@link waitUntilRunning} is the poller that watches the warehouse + * the rest of the way to RUNNING. We just nudge it out of the stopped state. + * + * Errors from the SDK (auth, bad warehouse id, connectivity) are intentionally + * NOT caught — the caller decides how to classify and react to them. + */ +export async function startWarehouse( + client: WorkspaceClient, + warehouseId: string, +): Promise { + await client.warehouses.start({ id: warehouseId }); +} + +/** + * Poll a warehouse until it reaches RUNNING, settles into a state it won't + * leave on its own, or a deadline elapses. + * + * Polling uses exponential backoff: the first wait is ~{@link INITIAL_POLL_MS}, + * doubling on each subsequent poll up to a ~{@link MAX_POLL_MS} cap. + * + * Resolution: + * - Resolves `"RUNNING"` as soon as the warehouse is running. + * - Resolves with the observed state if it reaches a not-coming-up state + * (`STOPPED`/`STOPPING`/`DELETED`/`DELETING`) — the caller decides what to do. + * + * Set `opts.treatStoppedAsTransient` when the caller has just issued a start and + * a still-`STOPPED`/`STOPPING` reading is expected to be a stale pre-start blip + * rather than a settled state. With it on, those two states are polled through + * (like `STARTING`) until RUNNING, a genuinely terminal `DELETED`/`DELETING`, or + * the deadline — so an immediate post-start STOPPED reading no longer bails the + * wait. Off (default), STOPPED/STOPPING remain terminal and resolve as before. + * + * Pass `opts.signal` to abort an in-progress wait (e.g. a dev server shutting + * down): the next deadline/abort check throws an `AbortError`, and a pending + * backoff sleep resolves immediately rather than holding the process open. + * + * @throws Error if `maxMs` elapses before the warehouse reaches RUNNING. + * @throws Error (`name === "AbortError"`) if `opts.signal` is or becomes aborted. + */ +export async function waitUntilRunning( + client: WorkspaceClient, + warehouseId: string, + opts: { + maxMs: number; + pollMs?: number; + signal?: AbortSignal; + treatStoppedAsTransient?: boolean; + }, +): Promise { + const { maxMs, signal, treatStoppedAsTransient } = opts; + const start = Date.now(); + let pollMs = opts.pollMs ?? INITIAL_POLL_MS; + + // Which states end the wait early. When we've just issued a start, STOPPED and + // STOPPING are expected stale readings, so only DELETED/DELETING stay terminal. + const terminalStates = treatStoppedAsTransient + ? NEVER_COMING_UP + : NOT_COMING_UP; + + while (true) { + throwIfAborted(signal); + + const state = await getWarehouseState(client, warehouseId); + if (state === "RUNNING") return "RUNNING"; + if (terminalStates.has(state)) return state; + + if (Date.now() - start >= maxMs) { + throw new Error( + `Warehouse ${warehouseId} did not reach RUNNING within ${maxMs}ms (last state: ${state})`, + ); + } + + await delay(pollMs, signal); + throwIfAborted(signal); + + // Re-check the deadline after sleeping so we don't issue another poll past + // the budget purely because we napped through it. + if (Date.now() - start >= maxMs) { + throw new Error( + `Warehouse ${warehouseId} did not reach RUNNING within ${maxMs}ms (last state: ${state})`, + ); + } + + pollMs = Math.min(pollMs * 2, MAX_POLL_MS); + } +} + +/** Throw a DOMException-style AbortError if the signal has been aborted. */ +function throwIfAborted(signal?: AbortSignal): void { + if (!signal?.aborted) return; + const error = new Error("The warehouse wait was aborted."); + error.name = "AbortError"; + throw error; +} diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts new file mode 100644 index 000000000..ad3c2117d --- /dev/null +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, test } from "vitest"; +import { resolveTypegenMode } from "./generate-types"; + +describe("resolveTypegenMode (generate-types --no-block)", () => { + test("defaults to blocking when no options/flag are given", () => { + expect(resolveTypegenMode()).toBe("blocking"); + expect(resolveTypegenMode({})).toBe("blocking"); + }); + + test("stays blocking when block is true (flag absent)", () => { + expect(resolveTypegenMode({ block: true })).toBe("blocking"); + }); + + test("switches to degrade when --no-block sets block to false", () => { + // commander maps `--no-block` to `{ block: false }`. The template's + // postinstall/predev use this so a one-shot CLI never describes — it emits + // cached/`unknown` types and exits 0 instead of blocking on (or failing + // because of) a warehouse, even a RUNNING one. + expect(resolveTypegenMode({ block: false })).toBe("degrade"); + }); +}); diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index 1be7c7e2e..a5e2fd7ca 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -2,6 +2,22 @@ import fs from "node:fs"; import path from "node:path"; import { Command } from "commander"; +/** + * Resolve the typegen pre-flight mode for the CLI. Defaults to "blocking" — a + * deliberate/CI invocation should wait for a starting warehouse and fail fast on + * a stopped one. `--no-block` (commander sets `block: false`) switches to + * "degrade": a one-shot CLI can't describe in the background, so this mode never + * describes at all — it skips the warehouse probe AND every DESCRIBE, emits + * best-available types (cache where the SQL hash matches, else `result: unknown`) + * and returns immediately. The scaffolded template's postinstall/predev use it so + * they never block on — or get slowed by — a warehouse, even a RUNNING one. + */ +export function resolveTypegenMode(options?: { + block?: boolean; +}): "dev" | "blocking" | "degrade" { + return options?.block === false ? "degrade" : "blocking"; +} + /** * Generate types command implementation */ @@ -9,11 +25,12 @@ async function runGenerateTypes( rootDir?: string, outFile?: string, warehouseId?: string, - options?: { noCache?: boolean }, + options?: { noCache?: boolean; block?: boolean }, ) { try { const resolvedRootDir = rootDir || process.cwd(); const noCache = options?.noCache || false; + const mode = resolveTypegenMode(options); const typeGen = await import("@databricks/appkit/type-generator"); @@ -33,6 +50,7 @@ async function runGenerateTypes( outFile: resolvedOutFile, warehouseId: resolvedWarehouseId, noCache, + mode, }); console.log(`Generated query types: ${resolvedOutFile}`); } @@ -63,6 +81,18 @@ async function runGenerateTypes( console.error("Please install @databricks/appkit to use this command."); process.exit(1); } + // TypegenSyntaxError / TypegenFatalError carry a complete, actionable + // message (which queries failed and how to debug them). The stack trace + // points into appkit internals and is noise for app developers, so print + // only the message and exit non-zero instead of letting it bubble up. + if ( + error instanceof Error && + (error.name === "TypegenSyntaxError" || + error.name === "TypegenFatalError") + ) { + console.error(error.message); + process.exit(1); + } throw error; } } @@ -77,6 +107,10 @@ export const generateTypesCommand = new Command("generate-types") ) .argument("[warehouseId]", "Databricks warehouse ID") .option("--no-cache", "Disable caching for type generation") + .option( + "--no-block", + "Degrade instead of blocking on warehouse readiness (use for postinstall)", + ) .addHelpText( "after", ` @@ -84,6 +118,7 @@ Examples: $ appkit generate-types $ appkit generate-types . shared/appkit-types/analytics.d.ts $ appkit generate-types . shared/appkit-types/analytics.d.ts my-warehouse-id - $ appkit generate-types --no-cache`, + $ appkit generate-types --no-cache + $ appkit generate-types --no-block # postinstall: never block/fail on a cold warehouse`, ) .action(runGenerateTypes); diff --git a/packages/shared/src/cli/commands/type-generator.d.ts b/packages/shared/src/cli/commands/type-generator.d.ts index 5b373b855..1cf65cdb1 100644 --- a/packages/shared/src/cli/commands/type-generator.d.ts +++ b/packages/shared/src/cli/commands/type-generator.d.ts @@ -5,6 +5,11 @@ declare module "@databricks/appkit/type-generator" { outFile: string; warehouseId: string; noCache?: boolean; + // Warehouse preflight policy. "dev" never blocks the developer; "blocking" + // waits for a startable warehouse and treats a stopped one as fatal; + // "degrade" never probes the warehouse and never describes (emits + // cached/`unknown` types and returns immediately). + mode?: "dev" | "blocking" | "degrade"; }): Promise; export class TypegenSyntaxError extends Error { diff --git a/template/package.json b/template/package.json index d29661427..f25dfe770 100644 --- a/template/package.json +++ b/template/package.json @@ -20,11 +20,12 @@ "test:e2e:ui": "playwright test --ui", "test:smoke": "playwright install chromium && playwright test tests/smoke.spec.ts", "clean": "rm -rf client/dist dist build node_modules .smoke-test test-results playwright-report", - "postinstall": "npm run typegen", + "postinstall": "npm run typegen:no-block", "prebuild": "npm run sync && npm run typegen", - "predev": "npm run sync && npm run typegen", + "predev": "npm run sync && npm run typegen:no-block", "sync": "appkit plugin sync --write --silent", "typegen": "appkit generate-types", + "typegen:no-block": "appkit generate-types --no-block", "setup": "appkit setup --write" }, "keywords": [], From befe0d987266b0582e3decb4c635c0f03bd8493f Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:44:06 +0200 Subject: [PATCH 06/13] refactor(appkit): collapse typegen pre-flight modes to non-blocking|blocking Replace the dev/blocking/degrade modes with two: non-blocking (default) and blocking. non-blocking always degrades (skip probe + DESCRIBE, write cache-or- unknown instantly); blocking keeps the current probe+wait flow. The CLI default flips to non-blocking and --no-block becomes a positive --block flag. The dev plugin runs the foreground in non-blocking (instant degrade) while its background warehouse-watch regenerates in blocking so real types still land. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/type-generator/index.ts | 8 +- .../appkit/src/type-generator/preflight.ts | 26 ++-- .../src/type-generator/query-registry.ts | 22 ++-- .../tests/generate-queries.test.ts | 120 +++++++++++------- .../type-generator/tests/preflight.test.ts | 23 ++-- .../type-generator/tests/vite-plugin.test.ts | 27 +++- .../appkit/src/type-generator/vite-plugin.ts | 80 ++++++++---- .../src/cli/commands/generate-types.test.ts | 26 ++-- .../shared/src/cli/commands/generate-types.ts | 24 ++-- .../src/cli/commands/type-generator.d.ts | 10 +- template/package.json | 4 +- 11 files changed, 226 insertions(+), 144 deletions(-) diff --git a/packages/appkit/src/type-generator/index.ts b/packages/appkit/src/type-generator/index.ts index 0516c42d0..42b8c3244 100644 --- a/packages/appkit/src/type-generator/index.ts +++ b/packages/appkit/src/type-generator/index.ts @@ -208,7 +208,13 @@ export async function generateFromEntryPoint(options: { noCache?: boolean; mode?: PreflightMode; }) { - const { outFile, queryFolder, warehouseId, noCache, mode = "dev" } = options; + const { + outFile, + queryFolder, + warehouseId, + noCache, + mode = "non-blocking", + } = options; const projectRoot = resolveProjectRoot(outFile); logger.debug("Starting type generation..."); diff --git a/packages/appkit/src/type-generator/preflight.ts b/packages/appkit/src/type-generator/preflight.ts index 3bb67c5fd..44d33de8b 100644 --- a/packages/appkit/src/type-generator/preflight.ts +++ b/packages/appkit/src/type-generator/preflight.ts @@ -2,15 +2,14 @@ import type { WarehouseState } from "./warehouse-status"; /** * How aggressively typegen should react to a not-ready warehouse. - * - `dev`: never block the developer; degrade to `unknown`/cached types, but a - * RUNNING warehouse is still described (fast path / background fire-and-forget). + * - `non-blocking`: never describe and never probe the warehouse — emit + * best-available types (cache where the SQL hash matches, else `unknown`) and + * return at once. The default for interactive/foreground runs that can't + * afford to block on (or fail because of) a warehouse, even a RUNNING one. * - `blocking`: a startable warehouse is worth waiting for, and a stopped one * is a hard failure. - * - `degrade`: never describe and never probe the warehouse — emit best-available - * types (cache where the SQL hash matches, else `unknown`) and return at once. - * For a one-shot CLI (`--no-block`) that can't describe in the background. */ -export type PreflightMode = "dev" | "blocking" | "degrade"; +export type PreflightMode = "non-blocking" | "blocking"; /** * What the caller should do given a warehouse state and mode. @@ -36,19 +35,22 @@ export function decidePreflight( state: WarehouseState, mode: PreflightMode, ): PreflightDecision { - // `degrade` never describes regardless of state: emit cached/`unknown` types - // and return. The caller short-circuits before probing, so this is only a - // belt-and-suspenders mapping, but it keeps the policy total and self-contained. - if (mode === "degrade") return "degradeAll"; + // `non-blocking` never describes regardless of state: emit cached/`unknown` + // types and return. The caller short-circuits before probing, so this is only + // a belt-and-suspenders mapping, but it keeps the policy total and + // self-contained. + if (mode === "non-blocking") return "degradeAll"; + // `blocking`: a startable warehouse is worth waiting for, a stopped one is + // fatal, and a deleted one is always fatal. switch (state) { case "RUNNING": return "proceed"; case "STARTING": - return mode === "blocking" ? "waitThenProceed" : "degradeAll"; + return "waitThenProceed"; case "STOPPED": case "STOPPING": - return mode === "blocking" ? "fatal" : "degradeAll"; + return "fatal"; case "DELETED": case "DELETING": return "fatal"; diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index da484ca86..284847493 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -420,11 +420,10 @@ export function inferParameterTypes( * @param warehouseId - the warehouse id to use for schema analysis * @param options - options for the query generation * @param options.noCache - if true, skip the cache and regenerate all types - * @param options.mode - preflight policy: "dev" never blocks the developer - * (degrades a not-ready warehouse, but still describes a RUNNING one), - * "blocking" waits for a startable warehouse and treats a stopped one as fatal, - * "degrade" never probes the warehouse and never describes (emits cached/`unknown` - * types and returns immediately). Defaults to "dev". + * @param options.mode - preflight policy: "non-blocking" never probes the + * warehouse and never describes (emits cached/`unknown` types and returns + * immediately), "blocking" waits for a startable warehouse and treats a + * stopped one as fatal. Defaults to "non-blocking". * @returns an array of query schemas */ export async function generateQueriesFromDescribe( @@ -439,7 +438,7 @@ export async function generateQueriesFromDescribe( const { noCache = false, concurrency: rawConcurrency = 10, - mode = "dev", + mode = "non-blocking", } = options; const concurrency = typeof rawConcurrency === "number" && Number.isFinite(rawConcurrency) @@ -583,11 +582,12 @@ export async function generateQueriesFromDescribe( // not-ready warehouse degrades exactly like a per-query outage. let decision: ReturnType = "proceed"; let fatalMessage = ""; - if (mode === "degrade") { - // `degrade` never describes and must make ZERO warehouse round-trips: skip - // the probe entirely (no getWarehouseState) and go straight to degradeAll. - // A one-shot CLI (`--no-block`) can't describe in the background, so it - // emits best-available types (reused cache or `unknown`) and returns now. + if (mode === "non-blocking") { + // `non-blocking` never describes and must make ZERO warehouse round-trips: + // skip the probe entirely (no getWarehouseState) and go straight to + // degradeAll. A foreground/one-shot run can't describe in the background, + // so it emits best-available types (reused cache or `unknown`) and returns + // now. decision = "degradeAll"; } else { try { diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index ea46f03de..e6b5a9af7 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -45,6 +45,21 @@ vi.mock("../cache", async (importOriginal) => { const { generateQueriesFromDescribe } = await import("../query-registry"); const { CACHE_VERSION, hashSQL } = await import("../cache"); +// The default mode is "non-blocking", which never probes the warehouse and never +// describes. The bulk of these tests exercise the DESCRIBE / classify path, so +// they run in "blocking" mode (probe → proceed → describe) by default. Tests +// that specifically assert the non-blocking short-circuit pass an explicit mode. +function describeQueries( + queryFolder: string, + warehouseId: string, + options: Parameters[2] = {}, +) { + return generateQueriesFromDescribe(queryFolder, warehouseId, { + mode: "blocking", + ...options, + }); +} + // Sentinel for a previously-generated good type. The code passes cached types // through verbatim, so equality proves reuse rather than regeneration. const CACHED_GOOD_TYPE = "RESULT_REUSED_FROM_CACHE"; @@ -86,8 +101,10 @@ describe("generateQueriesFromDescribe", () => { ]), ); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -112,7 +129,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("bad_table"); @@ -129,7 +146,7 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED" }, }); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("query"); @@ -154,7 +171,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(2); @@ -183,8 +200,10 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED", error: { message: "Table not found" } }, }); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(2); expect(schemas[0].name).toBe("a"); @@ -196,6 +215,8 @@ describe("generateQueriesFromDescribe", () => { expect(mocks.saveCache).toHaveBeenCalledTimes(1); // a = connectivity (rejected) → NOT a syntax error; b = FAILED → syntax error expect(syntaxErrors).toEqual([{ name: "b", message: "Table not found" }]); + // neither a connectivity failure nor a SQL error is classified as fatal + expect(fatalErrors).toEqual([]); // neither failure is persisted to the cache expect(lastSavedQueries()).not.toHaveProperty("a"); expect(lastSavedQueries()).not.toHaveProperty("b"); @@ -214,13 +235,9 @@ describe("generateQueriesFromDescribe", () => { .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) .mockResolvedValueOnce(succeededResult([["id", "INT", null]])); - const { schemas } = await generateQueriesFromDescribe( - "/queries", - "wh-123", - { - concurrency: 2, - }, - ); + const { schemas } = await describeQueries("/queries", "wh-123", { + concurrency: 2, + }); expect(schemas).toHaveLength(3); expect(schemas[0].name).toBe("q1"); @@ -240,7 +257,7 @@ describe("generateQueriesFromDescribe", () => { Object.assign(new Error("connect ETIMEDOUT"), { code: "ETIMEDOUT" }), ); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].type).toContain("status: SQLStringMarker"); @@ -267,8 +284,10 @@ describe("generateQueriesFromDescribe", () => { }), ); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas[0].type).not.toBe(CACHED_GOOD_TYPE); expect(schemas[0].type).toContain("result: unknown"); @@ -290,7 +309,7 @@ describe("generateQueriesFromDescribe", () => { new Error("PERMISSION_DENIED: missing warehouse permission"), ); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -313,7 +332,7 @@ describe("generateQueriesFromDescribe", () => { Object.assign(new Error("Service unavailable"), { statusCode: 503 }), ); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -343,7 +362,7 @@ describe("generateQueriesFromDescribe", () => { mocks.readFile.mockResolvedValue("SELECT id FROM users"); mocks.executeStatement.mockRejectedValueOnce(error); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -367,8 +386,10 @@ describe("generateQueriesFromDescribe", () => { }) .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(2); expect(syntaxErrors).toEqual([ @@ -388,7 +409,7 @@ describe("generateQueriesFromDescribe", () => { }), ); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -409,7 +430,7 @@ describe("generateQueriesFromDescribe", () => { ), ); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -425,10 +446,7 @@ describe("generateQueriesFromDescribe", () => { new Error("unable to verify the first certificate"), ); - const { fatalErrors } = await generateQueriesFromDescribe( - "/queries", - "wh-123", - ); + const { fatalErrors } = await describeQueries("/queries", "wh-123"); expect(fatalErrors).toEqual([]); }); @@ -448,7 +466,7 @@ describe("generateQueriesFromDescribe", () => { }), ); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -475,7 +493,7 @@ describe("generateQueriesFromDescribe", () => { .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); - const { schemas, fatalErrors } = await generateQueriesFromDescribe( + const { schemas, fatalErrors } = await describeQueries( "/queries", "wh-123", ); @@ -494,7 +512,7 @@ describe("generateQueriesFromDescribe", () => { mocks.readFile.mockResolvedValue("SELECT 1"); mocks.executeStatement.mockResolvedValue(succeededResult([])); - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + const { schemas, syntaxErrors } = await describeQueries( "/queries", "wh-123", ); @@ -515,8 +533,10 @@ describe("generateQueriesFromDescribe", () => { status: { state: "PENDING" }, }); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -543,10 +563,13 @@ describe("generateQueriesFromDescribe", () => { status: { state: "RUNNING" }, }); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123", { + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + { concurrency: 1, - }); + }, + ); expect(schemas).toHaveLength(2); // both entries resolve to the good type — the PENDING one reuses the cache @@ -570,7 +593,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + const { schemas, syntaxErrors } = await describeQueries( "/queries", "wh-123", ); @@ -593,7 +616,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const { schemas, syntaxErrors } = await generateQueriesFromDescribe( + const { schemas, syntaxErrors } = await describeQueries( "/queries", "wh-123", ); @@ -618,7 +641,7 @@ describe("generateQueriesFromDescribe", () => { succeededResult([["id", "INT", null]]), ); - const { schemas } = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(mocks.executeStatement).toHaveBeenCalledTimes(1); expect(schemas[0].type).toContain("id: number"); @@ -650,16 +673,19 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[1].type).toContain("result: unknown"); }); - test("STOPPED + dev mode — degrades silently, never describes", async () => { + test("non-blocking mode — degrades silently without probing, even when STOPPED", async () => { mocks.readdir.mockResolvedValue(["a.sql"]); mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Even a STOPPED warehouse is irrelevant: non-blocking never probes. mocks.getWarehouse.mockReturnValue({ state: "STOPPED" }); const { schemas, syntaxErrors, fatalErrors } = await generateQueriesFromDescribe("/queries", "wh-123", { - mode: "dev", + mode: "non-blocking", }); + // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE. + expect(mocks.getWarehouse).not.toHaveBeenCalled(); expect(mocks.executeStatement).not.toHaveBeenCalled(); expect(fatalErrors).toEqual([]); expect(syntaxErrors).toEqual([]); @@ -742,18 +768,18 @@ describe("generateQueriesFromDescribe", () => { expect(fatalErrors).toEqual([]); }); - test("degrade mode — skips probe + describes even when warehouse is RUNNING", async () => { + test("non-blocking mode — skips probe + describe even when warehouse is RUNNING", async () => { mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); mocks.readFile .mockResolvedValueOnce("SELECT id FROM a") .mockResolvedValueOnce("SELECT id FROM b"); // A RUNNING warehouse would normally take the proceed path and describe - // every query. In `degrade` mode the warehouse is never even probed. + // every query. In `non-blocking` mode the warehouse is never even probed. mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); const { schemas, syntaxErrors, fatalErrors } = await generateQueriesFromDescribe("/queries", "wh-123", { - mode: "degrade", + mode: "non-blocking", }); // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE. @@ -770,11 +796,11 @@ describe("generateQueriesFromDescribe", () => { expect(fatalErrors).toEqual([]); }); - test("degrade mode — reuses the cached type when the SQL hash matches", async () => { + test("non-blocking mode — reuses the cached type when the SQL hash matches", async () => { const sql = "SELECT id FROM users"; mocks.readdir.mockResolvedValue(["users.sql"]); mocks.readFile.mockResolvedValue(sql); - // Seed a last-good cached type under the current SQL hash. degrade mode + // Seed a last-good cached type under the current SQL hash. non-blocking // serves it via the normal cache HIT path — still no probe, no DESCRIBE. mocks.loadCache.mockReturnValueOnce({ version: CACHE_VERSION, @@ -786,7 +812,7 @@ describe("generateQueriesFromDescribe", () => { const { schemas, syntaxErrors, fatalErrors } = await generateQueriesFromDescribe("/queries", "wh-123", { - mode: "degrade", + mode: "non-blocking", }); expect(mocks.getWarehouse).not.toHaveBeenCalled(); diff --git a/packages/appkit/src/type-generator/tests/preflight.test.ts b/packages/appkit/src/type-generator/tests/preflight.test.ts index f68eaa513..1be0da4fb 100644 --- a/packages/appkit/src/type-generator/tests/preflight.test.ts +++ b/packages/appkit/src/type-generator/tests/preflight.test.ts @@ -13,34 +13,27 @@ const cases: Array<{ mode: PreflightMode; expected: PreflightDecision; }> = [ - { state: "RUNNING", mode: "dev", expected: "proceed" }, { state: "RUNNING", mode: "blocking", expected: "proceed" }, - { state: "STARTING", mode: "dev", expected: "degradeAll" }, { state: "STARTING", mode: "blocking", expected: "waitThenProceed" }, - { state: "STOPPED", mode: "dev", expected: "degradeAll" }, { state: "STOPPED", mode: "blocking", expected: "fatal" }, - { state: "STOPPING", mode: "dev", expected: "degradeAll" }, { state: "STOPPING", mode: "blocking", expected: "fatal" }, - { state: "DELETED", mode: "dev", expected: "fatal" }, { state: "DELETED", mode: "blocking", expected: "fatal" }, - { state: "DELETING", mode: "dev", expected: "fatal" }, { state: "DELETING", mode: "blocking", expected: "fatal" }, // Unknown state: backstop is the describe loop, so don't block. - { state: "WEIRD_FUTURE_STATE", mode: "dev", expected: "proceed" }, { state: "WEIRD_FUTURE_STATE", mode: "blocking", expected: "proceed" }, - // `degrade` never describes: every state (even RUNNING) maps to degradeAll. - { state: "RUNNING", mode: "degrade", expected: "degradeAll" }, - { state: "STARTING", mode: "degrade", expected: "degradeAll" }, - { state: "STOPPED", mode: "degrade", expected: "degradeAll" }, - { state: "STOPPING", mode: "degrade", expected: "degradeAll" }, - { state: "DELETED", mode: "degrade", expected: "degradeAll" }, - { state: "DELETING", mode: "degrade", expected: "degradeAll" }, - { state: "WEIRD_FUTURE_STATE", mode: "degrade", expected: "degradeAll" }, + // `non-blocking` never describes: every state (even RUNNING) maps to degradeAll. + { state: "RUNNING", mode: "non-blocking", expected: "degradeAll" }, + { state: "STARTING", mode: "non-blocking", expected: "degradeAll" }, + { state: "STOPPED", mode: "non-blocking", expected: "degradeAll" }, + { state: "STOPPING", mode: "non-blocking", expected: "degradeAll" }, + { state: "DELETED", mode: "non-blocking", expected: "degradeAll" }, + { state: "DELETING", mode: "non-blocking", expected: "degradeAll" }, + { state: "WEIRD_FUTURE_STATE", mode: "non-blocking", expected: "degradeAll" }, ]; describe("decidePreflight", () => { diff --git a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts index 86e0a8bc8..629b07889 100644 --- a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts +++ b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts @@ -134,14 +134,17 @@ describe("appKitTypesPlugin — generation mode", () => { else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; }); - test('passes mode: "dev" when NODE_ENV is not production', async () => { + test('foreground passes mode: "non-blocking" when NODE_ENV is not production', async () => { process.env.NODE_ENV = "development"; await runPlugin(); + // Dev foreground degrades instantly: it never blocks and never describes. + // (The warehouse watch is a RUNNING no-op here, so there's no background + // regenerate — exactly one foreground call.) expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( - expect.objectContaining({ mode: "dev" }), + expect.objectContaining({ mode: "non-blocking" }), ); }); @@ -296,6 +299,16 @@ describe("appKitTypesPlugin — background warehouse watch", () => { // Call 1: initial buildStart generate. Call 2: the watch's regenerate once // the warehouse reached RUNNING. expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + // Foreground (dev) degrades instantly; the background watch regenerate must + // DESCRIBE the now-RUNNING warehouse, so it runs blocking. + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); }); test("STARTING → waits without starting, then RUNNING regenerates in dev", async () => { @@ -318,6 +331,16 @@ describe("appKitTypesPlugin — background warehouse watch", () => { expect.objectContaining({ treatStoppedAsTransient: false }), ); expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + // Foreground (dev) degrades instantly; the background watch regenerate runs + // blocking so it describes the now-RUNNING warehouse. + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); }); test("RUNNING state does not start, wait, or regenerate", async () => { diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts index 4d217cc7b..3cc3f4c66 100644 --- a/packages/appkit/src/type-generator/vite-plugin.ts +++ b/packages/appkit/src/type-generator/vite-plugin.ts @@ -10,6 +10,7 @@ import { TypegenFatalError, TypegenSyntaxError, } from "./index"; +import type { PreflightMode } from "./preflight"; import { getWarehouseState, startWarehouse, @@ -52,8 +53,15 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { // (latest-wins — coalesces any number of overlapping triggers into a single // rerun). `queued` is read/cleared synchronously inside the drain loop so a // trigger landing in any window is caught before the drain exits. + // + // `pendingMode` is the mode the next generate should run in (latest-wins, like + // `queued`): the foreground build runs non-blocking in dev (instant degrade) + // while the background warehouse watch runs blocking (real DESCRIBEs). A + // blocking watch trigger that lands while a non-blocking foreground run is in + // flight therefore still describes when its trailing run fires. let inFlight: Promise | null = null; let queued = false; + let pendingMode: PreflightMode = "non-blocking"; // The currently-armed DEV background warehouse watch, if any. Aborting it // stops a pending waitUntilRunning (server shutdown, or a newer arm replacing @@ -61,12 +69,17 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { let watchController: AbortController | null = null; /** - * Generate types once. Never throws in dev (logs instead); in production it - * rethrows so the build fails. This is the un-guarded core — callers should - * go through {@link runGenerate} so concurrent triggers can't race-write the - * .d.ts. + * Generate types once in the given preflight {@link PreflightMode}. Never + * throws in dev (logs instead); in production it rethrows so the build fails. + * This is the un-guarded core — callers should go through {@link runGenerate} + * so concurrent triggers can't race-write the .d.ts. + * + * @param mode - preflight policy for this run. The foreground build passes a + * NODE_ENV-derived mode (blocking in production, non-blocking in dev so it + * degrades instantly); the background warehouse watch passes "blocking" so + * its regenerate actually DESCRIBEs and lands real (non-degraded) types. */ - async function generateOnce() { + async function generateOnce(mode: PreflightMode) { try { const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || ""; @@ -80,10 +93,7 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { queryFolder: watchFolders[0], warehouseId, noCache: false, - // Production build blocks for warehouse readiness and fails fast on a - // stopped/deleted warehouse; dev rolls forward (degrades) and never - // blocks startup. - mode: process.env.NODE_ENV === "production" ? "blocking" : "dev", + mode, }); } catch (error) { // TypegenSyntaxError / TypegenFatalError carry a complete, actionable @@ -113,17 +123,24 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { * .sql watcher, and the DEV warehouse watch all route through here so they can * never run typegen concurrently (which would race-write the .d.ts). * - * If a run is already in flight, this does NOT start a second one — it sets a - * trailing flag so exactly one more run fires after the current finishes, - * coalescing any number of overlapping triggers (latest-wins). + * If a run is already in flight, this does NOT start a second one — it records + * the requested mode and sets a trailing flag so exactly one more run fires + * after the current finishes, coalescing any number of overlapping triggers + * (latest-wins, including the mode: a blocking watch trigger that arrives mid + * non-blocking foreground run still describes when its trailing run fires). * + * @param mode - preflight policy for this run. Recorded into `pendingMode`, + * which the drain reads for each generate (latest trigger wins). * @returns A promise that resolves when this trigger's work (including any * trailing run it scheduled) has completed. */ - function runGenerate(): Promise { + function runGenerate(mode: PreflightMode): Promise { + pendingMode = mode; + if (inFlight) { // A run is active: remember that another trigger arrived and ride out the - // current run. One trailing run then covers all coalesced triggers. + // current run. One trailing run then covers all coalesced triggers and + // runs in the latest requested mode (recorded above). queued = true; return inFlight; } @@ -139,7 +156,11 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { const drain = async (): Promise => { while (true) { queued = false; - await generateOnce(); + // Snapshot the mode synchronously alongside clearing `queued` so a + // trigger landing during this generate is observed (via `queued`) on the + // next loop with its own mode, not silently dropped. + const runMode = pendingMode; + await generateOnce(runMode); // Synchronous check + clear, atomic w.r.t. other (synchronous) callers. if (!queued) { inFlight = null; @@ -166,6 +187,10 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { * No-op in production or without a warehouse id. Replaces any previously-armed * watch (aborting it first). Fully self-contained: it never throws into the * caller and never re-arms itself. + * + * The regenerate runs in "blocking" mode (not the foreground's non-blocking) + * so it actually DESCRIBEs the now-RUNNING warehouse and lands real types — + * the whole point of warming the warehouse in the background. */ function armWarehouseWatch(): void { if (process.env.NODE_ENV === "production") return; @@ -221,7 +246,9 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { if (final === "RUNNING" && !signal.aborted) { logger.debug("Warehouse reached RUNNING; regenerating types."); - await runGenerate(); + // Blocking: the warehouse is RUNNING now, so describe it and emit real + // (non-degraded) types — unlike the foreground dev run, which degraded. + await runGenerate("blocking"); } } catch { // Detached background task: any failure (timeout, abort, connectivity, @@ -263,13 +290,15 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { // Production: block the build on this generate (and surface failures). // The watch is a dev-only no-op, so just run typegen. if (process.env.NODE_ENV === "production") { - return runGenerate(); + return runGenerate("blocking"); } - // Dev: don't block startup waiting on typegen. Kick off the initial - // generate, then arm the warehouse watch so a cold-starting warehouse gets - // a one-shot regenerate once it's RUNNING. - void runGenerate(); + // Dev: don't block startup waiting on typegen. The foreground generate runs + // non-blocking — it skips the warehouse entirely and writes degraded + // (cached/`unknown`) types instantly. Then arm the warehouse watch so a + // cold-starting warehouse gets a one-shot BLOCKING regenerate (real types) + // once it's RUNNING. + void runGenerate("non-blocking"); armWarehouseWatch(); }, @@ -283,10 +312,11 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { if (isWatchedFile && changedFile.endsWith(".sql")) { // Route through the single-flight runner (was fire-and-forget - // generate(), which could race the initial build / watch). Re-arm the - // warehouse watch too: editing a query against a still-starting - // warehouse should also pick up fresh types once it warms up. - void runGenerate(); + // generate(), which could race the initial build / watch). This is a + // dev-only hook, so degrade instantly (non-blocking), then re-arm the + // warehouse watch: editing a query against a still-starting warehouse + // should pick up fresh (blocking-described) types once it warms up. + void runGenerate("non-blocking"); armWarehouseWatch(); } }); diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts index ad3c2117d..0a0873985 100644 --- a/packages/shared/src/cli/commands/generate-types.test.ts +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -1,21 +1,23 @@ import { describe, expect, test } from "vitest"; import { resolveTypegenMode } from "./generate-types"; -describe("resolveTypegenMode (generate-types --no-block)", () => { - test("defaults to blocking when no options/flag are given", () => { - expect(resolveTypegenMode()).toBe("blocking"); - expect(resolveTypegenMode({})).toBe("blocking"); +describe("resolveTypegenMode (generate-types --block)", () => { + test("defaults to non-blocking when no options/flag are given", () => { + // A one-shot CLI never describes by default — it emits cached/`unknown` types + // and exits 0 instead of blocking on (or failing because of) a warehouse, + // even a RUNNING one. The template's postinstall/predev rely on this. + expect(resolveTypegenMode()).toBe("non-blocking"); + expect(resolveTypegenMode({})).toBe("non-blocking"); }); - test("stays blocking when block is true (flag absent)", () => { - expect(resolveTypegenMode({ block: true })).toBe("blocking"); + test("stays non-blocking when block is false (flag absent)", () => { + expect(resolveTypegenMode({ block: false })).toBe("non-blocking"); }); - test("switches to degrade when --no-block sets block to false", () => { - // commander maps `--no-block` to `{ block: false }`. The template's - // postinstall/predev use this so a one-shot CLI never describes — it emits - // cached/`unknown` types and exits 0 instead of blocking on (or failing - // because of) a warehouse, even a RUNNING one. - expect(resolveTypegenMode({ block: false })).toBe("degrade"); + test("switches to blocking when --block sets block to true", () => { + // commander maps `--block` to `{ block: true }`. A deliberate/CI invocation + // opts in to waiting for a starting warehouse and failing fast on a stopped + // one. + expect(resolveTypegenMode({ block: true })).toBe("blocking"); }); }); diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index a5e2fd7ca..c50fba7e0 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -3,19 +3,19 @@ import path from "node:path"; import { Command } from "commander"; /** - * Resolve the typegen pre-flight mode for the CLI. Defaults to "blocking" — a - * deliberate/CI invocation should wait for a starting warehouse and fail fast on - * a stopped one. `--no-block` (commander sets `block: false`) switches to - * "degrade": a one-shot CLI can't describe in the background, so this mode never - * describes at all — it skips the warehouse probe AND every DESCRIBE, emits + * Resolve the typegen pre-flight mode for the CLI. Defaults to "non-blocking" — + * a one-shot CLI can't describe in the background, so by default it never + * describes at all: it skips the warehouse probe AND every DESCRIBE, emits * best-available types (cache where the SQL hash matches, else `result: unknown`) - * and returns immediately. The scaffolded template's postinstall/predev use it so - * they never block on — or get slowed by — a warehouse, even a RUNNING one. + * and returns immediately, never blocking on — or failing because of — a + * warehouse, even a RUNNING one. Pass `--block` (commander sets `block: true`) + * for a deliberate/CI invocation that should wait for a starting warehouse and + * fail fast on a stopped one. */ export function resolveTypegenMode(options?: { block?: boolean; -}): "dev" | "blocking" | "degrade" { - return options?.block === false ? "degrade" : "blocking"; +}): "non-blocking" | "blocking" { + return options?.block ? "blocking" : "non-blocking"; } /** @@ -108,8 +108,8 @@ export const generateTypesCommand = new Command("generate-types") .argument("[warehouseId]", "Databricks warehouse ID") .option("--no-cache", "Disable caching for type generation") .option( - "--no-block", - "Degrade instead of blocking on warehouse readiness (use for postinstall)", + "--block", + "Block on warehouse readiness instead of degrading (use for CI)", ) .addHelpText( "after", @@ -119,6 +119,6 @@ Examples: $ appkit generate-types . shared/appkit-types/analytics.d.ts $ appkit generate-types . shared/appkit-types/analytics.d.ts my-warehouse-id $ appkit generate-types --no-cache - $ appkit generate-types --no-block # postinstall: never block/fail on a cold warehouse`, + $ appkit generate-types --block # CI: wait for the warehouse and fail on a cold one`, ) .action(runGenerateTypes); diff --git a/packages/shared/src/cli/commands/type-generator.d.ts b/packages/shared/src/cli/commands/type-generator.d.ts index 1cf65cdb1..d03dd547a 100644 --- a/packages/shared/src/cli/commands/type-generator.d.ts +++ b/packages/shared/src/cli/commands/type-generator.d.ts @@ -5,11 +5,11 @@ declare module "@databricks/appkit/type-generator" { outFile: string; warehouseId: string; noCache?: boolean; - // Warehouse preflight policy. "dev" never blocks the developer; "blocking" - // waits for a startable warehouse and treats a stopped one as fatal; - // "degrade" never probes the warehouse and never describes (emits - // cached/`unknown` types and returns immediately). - mode?: "dev" | "blocking" | "degrade"; + // Warehouse preflight policy. "non-blocking" never probes the warehouse and + // never describes (emits cached/`unknown` types and returns immediately); + // "blocking" waits for a startable warehouse and treats a stopped one as + // fatal. + mode?: "non-blocking" | "blocking"; }): Promise; export class TypegenSyntaxError extends Error { diff --git a/template/package.json b/template/package.json index f25dfe770..aae8f721c 100644 --- a/template/package.json +++ b/template/package.json @@ -24,8 +24,8 @@ "prebuild": "npm run sync && npm run typegen", "predev": "npm run sync && npm run typegen:no-block", "sync": "appkit plugin sync --write --silent", - "typegen": "appkit generate-types", - "typegen:no-block": "appkit generate-types --no-block", + "typegen": "appkit generate-types --block", + "typegen:no-block": "appkit generate-types", "setup": "appkit setup --write" }, "keywords": [], From 84a73b9fffbac16c0b4cf253fc9fe33972dd4a01 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:46:53 +0200 Subject: [PATCH 07/13] feat(appkit): blocking typegen auto-starts a stopped warehouse and waits In blocking mode a STOPPED/STOPPING warehouse is now started and waited on (startWaitProceed: startWarehouse -> waitUntilRunning -> describe) instead of failing fast. Only DELETED/DELETING is fatal. STARTING still waits; RUNNING describes. The write-the-.d.ts-then-throw invariant is preserved for the fatal and wait-timeout paths. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../appkit/src/type-generator/preflight.ts | 11 +- .../src/type-generator/query-registry.ts | 28 ++++- .../tests/generate-queries.test.ts | 118 +++++++++++++++--- .../type-generator/tests/preflight.test.ts | 6 +- 4 files changed, 135 insertions(+), 28 deletions(-) diff --git a/packages/appkit/src/type-generator/preflight.ts b/packages/appkit/src/type-generator/preflight.ts index 44d33de8b..e35c92592 100644 --- a/packages/appkit/src/type-generator/preflight.ts +++ b/packages/appkit/src/type-generator/preflight.ts @@ -7,7 +7,7 @@ import type { WarehouseState } from "./warehouse-status"; * return at once. The default for interactive/foreground runs that can't * afford to block on (or fail because of) a warehouse, even a RUNNING one. * - `blocking`: a startable warehouse is worth waiting for, and a stopped one - * is a hard failure. + * is worth starting — only a deleted/deleting warehouse is a hard failure. */ export type PreflightMode = "non-blocking" | "blocking"; @@ -16,12 +16,15 @@ export type PreflightMode = "non-blocking" | "blocking"; * - `proceed`: run DESCRIBE now. * - `degradeAll`: skip DESCRIBE; emit degraded (cached/`unknown`) types. * - `waitThenProceed`: wait for the warehouse to start, then run DESCRIBE. + * - `startWaitProceed`: start the stopped warehouse, wait for RUNNING, then + * run DESCRIBE. * - `fatal`: stop — the warehouse can't serve this run. */ export type PreflightDecision = | "proceed" | "degradeAll" | "waitThenProceed" + | "startWaitProceed" | "fatal"; /** @@ -41,8 +44,8 @@ export function decidePreflight( // self-contained. if (mode === "non-blocking") return "degradeAll"; - // `blocking`: a startable warehouse is worth waiting for, a stopped one is - // fatal, and a deleted one is always fatal. + // `blocking`: a starting warehouse is worth waiting for, a stopped one is + // worth starting (then waiting), and only a deleted/deleting one is fatal. switch (state) { case "RUNNING": return "proceed"; @@ -50,7 +53,7 @@ export function decidePreflight( return "waitThenProceed"; case "STOPPED": case "STOPPING": - return "fatal"; + return "startWaitProceed"; case "DELETED": case "DELETING": return "fatal"; diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 284847493..ce4b82396 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -15,7 +15,11 @@ import { sqlTypeToHelper, sqlTypeToMarker, } from "./types"; -import { getWarehouseState, waitUntilRunning } from "./warehouse-status"; +import { + getWarehouseState, + startWarehouse, + waitUntilRunning, +} from "./warehouse-status"; const logger = createLogger("type-generator:query-registry"); @@ -422,8 +426,9 @@ export function inferParameterTypes( * @param options.noCache - if true, skip the cache and regenerate all types * @param options.mode - preflight policy: "non-blocking" never probes the * warehouse and never describes (emits cached/`unknown` types and returns - * immediately), "blocking" waits for a startable warehouse and treats a - * stopped one as fatal. Defaults to "non-blocking". + * immediately), "blocking" waits for a starting warehouse and starts (then + * waits for) a stopped one, treating only a deleted/deleting warehouse as + * fatal. Defaults to "non-blocking". * @returns an array of query schemas */ export async function generateQueriesFromDescribe( @@ -596,6 +601,23 @@ export async function generateQueriesFromDescribe( if (decision === "fatal") { fatalMessage = `warehouse ${warehouseId} is ${state}`; } + if (decision === "startWaitProceed") { + // Stopped/stopping warehouse: nudge it out of the stopped state, then + // poll to RUNNING. treatStoppedAsTransient rides out the stale + // pre-start STOPPED/STOPPING reading the start hasn't propagated past + // yet — only DELETED/DELETING (or the deadline) ends the wait early. + await startWarehouse(client, warehouseId); + const final = await waitUntilRunning(client, warehouseId, { + maxMs: PREFLIGHT_WAIT_MAX_MS, + treatStoppedAsTransient: true, + }); + if (final === "RUNNING") { + decision = "proceed"; + } else { + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId} did not reach RUNNING (now ${final})`; + } + } if (decision === "waitThenProceed") { const final = await waitUntilRunning(client, warehouseId, { maxMs: PREFLIGHT_WAIT_MAX_MS, diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index e6b5a9af7..4ebcaac2b 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -8,6 +8,8 @@ const mocks = vi.hoisted(() => ({ // test takes the "proceed" path unchanged; override per-test to exercise // stopped/starting/unreachable preflight branches. getWarehouse: vi.fn(() => ({ state: "RUNNING" })), + // warehouses.start — only the blocking startWaitProceed path calls this. + startWarehouse: vi.fn(), spinnerStop: vi.fn(), spinnerPrintDetail: vi.fn(), loadCache: vi.fn(() => ({ version: "2", queries: {} })), @@ -24,7 +26,7 @@ vi.mock("node:fs/promises", () => ({ vi.mock("@databricks/sdk-experimental", () => ({ WorkspaceClient: vi.fn(() => ({ statementExecution: { executeStatement: mocks.executeStatement }, - warehouses: { get: mocks.getWarehouse }, + warehouses: { get: mocks.getWarehouse, start: mocks.startWarehouse }, })), })); @@ -649,28 +651,106 @@ describe("generateQueriesFromDescribe", () => { }); describe("warehouse preflight", () => { - test("STOPPED + blocking mode — fatal per query, never describes", async () => { - mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); - mocks.readFile - .mockResolvedValueOnce("SELECT id FROM a") - .mockResolvedValueOnce("SELECT id FROM b"); - mocks.getWarehouse.mockReturnValue({ state: "STOPPED" }); + test("STOPPED + blocking mode — starts the warehouse, waits for RUNNING, then describes", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STOPPED (→ startWaitProceed): warehouses.start fires, + // then waitUntilRunning polls the stale STOPPED once more before RUNNING. + // After RUNNING, DESCRIBE runs normally. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); - const { schemas, syntaxErrors, fatalErrors } = - await generateQueriesFromDescribe("/queries", "wh-123", { + const promise = generateQueriesFromDescribe("/queries", "wh-123", { mode: "blocking", }); + // Drive the wait loop's backoff sleep(s) so it can re-poll and observe + // RUNNING. Run pending timers until the work settles. + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; - // One fatal entry per uncached query; the whole describe batch is skipped. - expect(mocks.executeStatement).not.toHaveBeenCalled(); - expect(fatalErrors).toEqual([ - { name: "a", message: "warehouse wh-123 is STOPPED" }, - { name: "b", message: "warehouse wh-123 is STOPPED" }, - ]); - expect(syntaxErrors).toEqual([]); - expect(schemas).toHaveLength(2); - expect(schemas[0].type).toContain("result: unknown"); - expect(schemas[1].type).toContain("result: unknown"); + // The stopped warehouse was started, then described once it came up. + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.startWarehouse).toHaveBeenCalledWith({ id: "wh-123" }); + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas).toHaveLength(1); + expect(schemas[0].name).toBe("a"); + expect(schemas[0].type).toContain("id: number"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + } finally { + vi.useRealTimers(); + } + }); + + test.each(["DELETED", "DELETING"] as const)( + "%s + blocking mode — fatal per query after schemas are written, never describes", + async (state) => { + mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM a") + .mockResolvedValueOnce("SELECT id FROM b"); + mocks.getWarehouse.mockReturnValue({ state }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + + // A deleted/deleting warehouse is the only fatal case: never started, + // never described; one fatal entry per uncached query. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([ + { name: "a", message: `warehouse wh-123 is ${state}` }, + { name: "b", message: `warehouse wh-123 is ${state}` }, + ]); + expect(syntaxErrors).toEqual([]); + // Schemas are still produced (degraded) so the .d.ts is written before + // generateFromEntryPoint throws on the recorded fatalErrors. + expect(schemas).toHaveLength(2); + expect(schemas[0].type).toContain("result: unknown"); + expect(schemas[1].type).toContain("result: unknown"); + }, + ); + + test("STOPPED + blocking — start succeeds but warehouse never reaches RUNNING is fatal", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STOPPED → start fires, but the warehouse then reports + // DELETED (a genuinely terminal state even with treatStoppedAsTransient). + // The wait resolves non-RUNNING → fatal; schemas still written. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValue({ state: "DELETED" }); + + const promise = generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; + + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([ + { + name: "a", + message: "warehouse wh-123 did not reach RUNNING (now DELETED)", + }, + ]); + expect(schemas[0].type).toContain("result: unknown"); + } finally { + vi.useRealTimers(); + } }); test("non-blocking mode — degrades silently without probing, even when STOPPED", async () => { diff --git a/packages/appkit/src/type-generator/tests/preflight.test.ts b/packages/appkit/src/type-generator/tests/preflight.test.ts index 1be0da4fb..11d5b6f9e 100644 --- a/packages/appkit/src/type-generator/tests/preflight.test.ts +++ b/packages/appkit/src/type-generator/tests/preflight.test.ts @@ -17,9 +17,11 @@ const cases: Array<{ { state: "STARTING", mode: "blocking", expected: "waitThenProceed" }, - { state: "STOPPED", mode: "blocking", expected: "fatal" }, - { state: "STOPPING", mode: "blocking", expected: "fatal" }, + // Stopped/stopping in blocking mode is worth starting + waiting, not fatal. + { state: "STOPPED", mode: "blocking", expected: "startWaitProceed" }, + { state: "STOPPING", mode: "blocking", expected: "startWaitProceed" }, + // Only a deleted/deleting warehouse is a hard failure. { state: "DELETED", mode: "blocking", expected: "fatal" }, { state: "DELETING", mode: "blocking", expected: "fatal" }, From 3c9cf2ab7a4ed13e26cbc7ad1b6b74897e564368 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:51:31 +0200 Subject: [PATCH 08/13] fix(appkit): dev typegen background-describes a running warehouse The dev background lifecycle returned early for RUNNING, a leftover from when the foreground described it synchronously. Now that the foreground always degrades instantly (non-blocking), RUNNING must also background-describe or a running warehouse never gets real types in dev. Only DELETED/DELETING leaves degraded; single-flight coalescing and abort-on-shutdown are preserved. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../type-generator/tests/vite-plugin.test.ts | 92 +++++++++++++++++-- .../appkit/src/type-generator/vite-plugin.ts | 69 ++++++++------ 2 files changed, 126 insertions(+), 35 deletions(-) diff --git a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts index 629b07889..16153257f 100644 --- a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts +++ b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts @@ -115,9 +115,12 @@ describe("appKitTypesPlugin — generation mode", () => { beforeEach(() => { vi.clearAllMocks(); mocks.generateFromEntryPoint.mockResolvedValue(undefined); - // Default the warehouse watch to a no-op (RUNNING => no wait) so tests that - // don't exercise it aren't perturbed by a background regenerate. - mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + // Default the warehouse watch to a no-op so tests that don't exercise it + // aren't perturbed by a background regenerate. DELETED is the only state the + // watch leaves alone (it can't be started and blocking would be fatal), so + // it never starts/waits/regenerates — unlike RUNNING, which now describes in + // the background. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); mocks.startWarehouse.mockResolvedValue(undefined); mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); // A non-empty warehouse ID is required or generate() short-circuits before @@ -140,7 +143,7 @@ describe("appKitTypesPlugin — generation mode", () => { await runPlugin(); // Dev foreground degrades instantly: it never blocks and never describes. - // (The warehouse watch is a RUNNING no-op here, so there's no background + // (The warehouse watch is a DELETED no-op here, so there's no background // regenerate — exactly one foreground call.) expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( @@ -174,8 +177,9 @@ describe("appKitTypesPlugin — single-flight generate", () => { beforeEach(() => { vi.clearAllMocks(); - // Watch is a no-op here (RUNNING) so it can't add stray generate calls. - mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + // Watch is a no-op here (DELETED leaves the degraded types alone) so it can't + // add stray generate calls — RUNNING would now describe in the background. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); mocks.startWarehouse.mockResolvedValue(undefined); mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); process.env.NODE_ENV = "development"; @@ -343,20 +347,90 @@ describe("appKitTypesPlugin — background warehouse watch", () => { ); }); - test("RUNNING state does not start, wait, or regenerate", async () => { + test("RUNNING → describes in the background after the dev foreground degrade", async () => { + // Phase 3 regression fix: in dev the foreground only degrades, so a RUNNING + // warehouse must still get REAL types from a background describe — it must + // NOT be skipped just because it's already warm. process.env.NODE_ENV = "development"; mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // Already RUNNING: no start. The wait is still issued (it returns on the + // first poll for a running warehouse), and we didn't start it, so the + // default terminal states apply. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: false }), + ); + // Call 1: initial buildStart foreground (degraded). Call 2: the background + // regenerate that DESCRIBEs the running warehouse and lands real types. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("DELETED → leaves degraded types, no start/wait/regenerate, no crash", async () => { + process.env.NODE_ENV = "development"; + // A deleted warehouse can't be started and blocking typegen would treat it + // as fatal, so the watch must leave the foreground's degraded types in place. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); + + await expect(runPlugin()).resolves.toBeUndefined(); + await flush(); + + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + // Only the initial (degraded) foreground generate ran; nothing threw. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("DELETING → leaves degraded types, no start/wait/regenerate", async () => { + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("DELETING" as WarehouseState); await runPlugin(); await flush(); - // Already RUNNING: the watch returns before starting or waiting, leaving - // only the single initial generate. expect(mocks.startWarehouse).not.toHaveBeenCalled(); expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); }); + test("background regenerate errors are swallowed (no crash), degraded remains", async () => { + // Even when the warehouse is RUNNING and the blocking regenerate THROWS + // (e.g. DESCRIBE surfaced a syntax/fatal error), nothing escapes into dev + // startup: in dev generateOnce catches+logs the throw (and the detached + // IIFE's catch is a further backstop), so the process never crashes and the + // degraded types written by the foreground remain. + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + // First call = foreground (degraded) succeeds; second = background blocking + // describe rejects. + mocks.generateFromEntryPoint + .mockResolvedValueOnce(undefined) + .mockRejectedValueOnce(new Error("warehouse exploded")); + + await expect(runPlugin()).resolves.toBeUndefined(); + await flush(); + + // The background regenerate was attempted (2 calls) but its rejection never + // escaped into the caller. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + test("no watch in production (armWarehouseWatch no-ops)", async () => { process.env.NODE_ENV = "production"; // Even if the warehouse were STOPPED, production must not arm the watch. diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts index 3cc3f4c66..c869acdf6 100644 --- a/packages/appkit/src/type-generator/vite-plugin.ts +++ b/packages/appkit/src/type-generator/vite-plugin.ts @@ -174,19 +174,29 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { } /** - * DEV-only: warm a stopped/cold-starting warehouse in the background and - * regenerate once it reaches RUNNING, so fresh (non-degraded) types land in - * the editor — without blocking dev startup. + * DEV-only: get the warehouse to RUNNING in the background and regenerate with + * real (non-degraded) types once it is — without blocking dev startup. The + * foreground build only ever degrades in dev (instant `unknown`/cached types), + * so this is what lands actual DESCRIBE results in the editor for EVERY + * reachable warehouse state, not just one that happens to already be warm. * * Post-probe behaviour by state: - * - RUNNING → return (the initial build already produced fresh types). - * - DELETED / DELETING → return (a deleted warehouse can't be started). - * - STOPPED / STOPPING → kick off a start, then wait for RUNNING. - * - STARTING → it's already coming up; just wait for RUNNING. + * - RUNNING → describe right away (the dev foreground degraded, so a running + * warehouse would otherwise never get real types — this is the case Phase 3 + * restores). `waitUntilRunning` returns immediately for an already-running + * warehouse, then the blocking regenerate fires. + * - STARTING → it's already coming up; just wait for RUNNING, then describe. + * - STOPPED / STOPPING → kick off a start, wait for RUNNING, then describe. + * - DELETED / DELETING → return (a deleted warehouse can't be started, and + * blocking typegen would treat it as fatal); leave the degraded types. * * No-op in production or without a warehouse id. Replaces any previously-armed * watch (aborting it first). Fully self-contained: it never throws into the - * caller and never re-arms itself. + * caller and never re-arms itself. The whole lifecycle is abortable via the + * shared {@link watchController} — its signal is threaded into + * `waitUntilRunning`, so a dev-server shutdown cancels a pending wait — and the + * regenerate routes through {@link runGenerate} so it can't race-write the + * .d.ts with the foreground degrade or a `.sql` re-trigger. * * The regenerate runs in "blocking" mode (not the foreground's non-blocking) * so it actually DESCRIBEs the now-RUNNING warehouse and lands real types — @@ -209,20 +219,19 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { const client = new WorkspaceClient({}); const state = await getWarehouseState(client, warehouseId); - // RUNNING already produced fresh types on the initial build; a deleted - // warehouse can't be started. Neither is worth watching. - if ( - state === "RUNNING" || - state === "DELETED" || - state === "DELETING" - ) { + // A deleted/deleting warehouse can't be started and blocking typegen + // would treat it as fatal — leave the degraded types and stop. Every + // other state (including RUNNING) proceeds to wait-then-describe so the + // dev editor gets real types, not just the foreground's degraded ones. + if (state === "DELETED" || state === "DELETING") { return; } - // Stopped/stopping won't reach RUNNING on its own — nudge it. STARTING - // is already coming up, so don't issue a redundant start. A failed start - // is non-fatal: give up silently rather than throw out of the detached - // task (the developer still has degraded/cached types). + // Stopped/stopping won't reach RUNNING on its own — nudge it. RUNNING and + // STARTING need no start (RUNNING is already up; STARTING is coming up), + // so don't issue a redundant one. A failed start is non-fatal: give up + // silently rather than throw out of the detached task (the developer + // still has degraded/cached types). let startedByUs = false; if (state === "STOPPED" || state === "STOPPING") { try { @@ -234,20 +243,26 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { } } + // Wait for RUNNING. For an already-RUNNING warehouse this returns on the + // first poll; for STARTING/STOPPED it polls (abortably) until the + // warehouse warms up, a terminal state, or the deadline. const final = await waitUntilRunning(client, warehouseId, { maxMs: DEV_WAREHOUSE_WATCH_MAX_MS, signal, // We just issued the start, so the first poll(s) often still report // STOPPED/STOPPING before the start propagates. Poll through those // instead of bailing, or the regenerate would never fire. When we - // didn't start it (STARTING branch), keep the default terminal states. + // didn't start it (RUNNING/STARTING branch), keep the default terminal + // states. treatStoppedAsTransient: startedByUs, }); if (final === "RUNNING" && !signal.aborted) { - logger.debug("Warehouse reached RUNNING; regenerating types."); + logger.debug("Warehouse is RUNNING; regenerating types."); // Blocking: the warehouse is RUNNING now, so describe it and emit real // (non-degraded) types — unlike the foreground dev run, which degraded. + // Routed through the single-flight guard so it coalesces with the + // foreground degrade / any `.sql` re-trigger instead of racing them. await runGenerate("blocking"); } } catch { @@ -295,9 +310,10 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { // Dev: don't block startup waiting on typegen. The foreground generate runs // non-blocking — it skips the warehouse entirely and writes degraded - // (cached/`unknown`) types instantly. Then arm the warehouse watch so a - // cold-starting warehouse gets a one-shot BLOCKING regenerate (real types) - // once it's RUNNING. + // (cached/`unknown`) types instantly. Then arm the warehouse watch so the + // warehouse gets a one-shot BLOCKING regenerate (real types) in the + // background for EVERY reachable state: RUNNING describes right away, while + // STARTING/STOPPED are waited (and started) until they reach RUNNING. void runGenerate("non-blocking"); armWarehouseWatch(); }, @@ -314,8 +330,9 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { // Route through the single-flight runner (was fire-and-forget // generate(), which could race the initial build / watch). This is a // dev-only hook, so degrade instantly (non-blocking), then re-arm the - // warehouse watch: editing a query against a still-starting warehouse - // should pick up fresh (blocking-described) types once it warms up. + // warehouse watch so the edited query is re-described in the background + // against the running warehouse (or once a still-starting one warms + // up), landing fresh blocking-described types. void runGenerate("non-blocking"); armWarehouseWatch(); } From 976550c5c2e0693f834bf612eef1edbeb5fe5e7b Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:56:48 +0200 Subject: [PATCH 09/13] feat(appkit): non-blocking typegen CLI spawns a detached blocking worker The default (non-blocking) generate-types now writes degraded types, then spawns a detached `generate-types --block` worker behind a single-flight lock and exits 0 -- so postinstall/predev never block on warehouse state. The worker does the full blocking lifecycle in the background, refreshes real types, and releases the lock on exit (process-exit guard covers a hard fail). A stale lock from a crashed worker is stolen after 6 min; spawn failure is non-fatal to the foreground. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../src/cli/commands/generate-types.test.ts | 207 +++++++++++++++++- .../shared/src/cli/commands/generate-types.ts | 155 ++++++++++++- .../src/cli/commands/spawn-lock.test.ts | 93 ++++++++ .../shared/src/cli/commands/spawn-lock.ts | 148 +++++++++++++ 4 files changed, 597 insertions(+), 6 deletions(-) create mode 100644 packages/shared/src/cli/commands/spawn-lock.test.ts create mode 100644 packages/shared/src/cli/commands/spawn-lock.ts diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts index 0a0873985..bdece9983 100644 --- a/packages/shared/src/cli/commands/generate-types.test.ts +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -1,5 +1,81 @@ -import { describe, expect, test } from "vitest"; -import { resolveTypegenMode } from "./generate-types"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { + afterEach, + beforeEach, + describe, + expect, + type Mock, + test, + vi, +} from "vitest"; + +// --- Module mocks ----------------------------------------------------------- +// vi.mock factories are hoisted above the file, so the spies they return must be +// created in a hoisted block too (plain top-level consts would be in the TDZ when +// the hoisted factory runs). +const { + generateFromEntryPoint, + generateServingTypes, + unref, + spawn, + acquireSpawnLock, + releaseSpawnLock, + getSpawnLockPath, +} = vi.hoisted(() => { + // `path` import isn't available yet inside a hoisted block; require it here. + const nodePath = require("node:path") as typeof import("node:path"); + const unref = vi.fn(); + const lockPathOf = (root: string) => + nodePath.join(root, "node_modules", ".databricks", "appkit", "worker.lock"); + return { + generateFromEntryPoint: vi.fn(async () => {}), + generateServingTypes: vi.fn(async () => {}), + unref, + spawn: vi.fn( + (_bin: string, _args: string[], _opts: Record) => ({ + unref, + }), + ), + acquireSpawnLock: vi.fn(() => true), + releaseSpawnLock: vi.fn(), + getSpawnLockPath: vi.fn(lockPathOf), + }; +}); + +// The library type-generator is an optional/ambient module; mock it so the +// command's `await import("@databricks/appkit/type-generator")` resolves to spies +// and never touches a warehouse. +vi.mock("@databricks/appkit/type-generator", () => ({ + generateFromEntryPoint, + generateServingTypes, +})); + +// Mock the detached spawn so we can assert how the worker is launched without +// actually forking a process. +vi.mock("node:child_process", () => ({ spawn })); + +// Mock the single-flight lock so each test controls acquire/steal outcomes and +// we can assert release. Steal/fresh semantics of the real implementation are +// covered separately in spawn-lock.test.ts. +vi.mock("./spawn-lock.js", () => ({ + acquireSpawnLock, + releaseSpawnLock, + getSpawnLockPath, + SPAWN_LOCK_STALE_MS: 360_000, +})); + +import { generateTypesCommand, resolveTypegenMode } from "./generate-types"; + +/** + * Drive the real commander command the way the bin does, so argv parsing + * (`--block`, `--worker-lock ` → camelCase, positionals) is exercised + * end-to-end. `from: "user"` means args are the user-supplied tokens only. + */ +async function runCli(args: string[]): Promise { + await generateTypesCommand.parseAsync(args, { from: "user" }); +} describe("resolveTypegenMode (generate-types --block)", () => { test("defaults to non-blocking when no options/flag are given", () => { @@ -21,3 +97,130 @@ describe("resolveTypegenMode (generate-types --block)", () => { expect(resolveTypegenMode({ block: true })).toBe("blocking"); }); }); + +describe("generate-types foreground spawn orchestration", () => { + let tmpRoot: string; + let consoleLog: Mock; + let consoleError: Mock; + const prevWarehouse = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + acquireSpawnLock.mockReturnValue(true); + + // A real temp project root with a config/queries folder so the analytics + // generate path runs. + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "gentypes-")); + fs.mkdirSync(path.join(tmpRoot, "config", "queries"), { recursive: true }); + process.env.DATABRICKS_WAREHOUSE_ID = "wh-123"; + + consoleLog = vi.spyOn(console, "log").mockImplementation(() => {}) as Mock; + consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => {}) as Mock; + }); + + afterEach(() => { + vi.restoreAllMocks(); + fs.rmSync(tmpRoot, { recursive: true, force: true }); + if (prevWarehouse === undefined) { + delete process.env.DATABRICKS_WAREHOUSE_ID; + } else { + process.env.DATABRICKS_WAREHOUSE_ID = prevWarehouse; + } + }); + + test("non-blocking: generates degraded types and spawns exactly one detached worker", async () => { + const outFile = path.join(tmpRoot, "shared/appkit-types/analytics.d.ts"); + + await runCli([tmpRoot, outFile, "wh-123"]); + + // Library generate ran in non-blocking mode (writes degraded types). + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "non-blocking" }), + ); + + // Exactly one detached worker, re-invoking this CLI with --block and the + // worker lock, forwarding the same positional targets. + expect(spawn).toHaveBeenCalledTimes(1); + const [bin, argv, opts] = spawn.mock.calls[0]; + expect(bin).toBe(process.execPath); + expect(argv[0]).toBe(process.argv[1]); // CLI entry + expect(argv.slice(1)).toEqual([ + "generate-types", + "--block", + "--worker-lock", + getSpawnLockPath(tmpRoot), + tmpRoot, + outFile, + "wh-123", + ]); + expect(opts).toMatchObject({ detached: true, stdio: "ignore" }); + expect(unref).toHaveBeenCalledTimes(1); + }); + + test("lock already held (fresh): does NOT spawn, foreground still resolves", async () => { + acquireSpawnLock.mockReturnValue(false); + + await expect(runCli([tmpRoot])).resolves.toBeUndefined(); + + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(spawn).not.toHaveBeenCalled(); + // One-line single-flight note. + expect(consoleLog).toHaveBeenCalledWith( + "Type refresh already in progress, skipping.", + ); + }); + + test("stale lock: steals (acquire returns true) and spawns", async () => { + // acquireSpawnLock returning true models a stolen stale lock (the real steal + // path is unit-tested in spawn-lock.test.ts). + acquireSpawnLock.mockReturnValue(true); + + await runCli([tmpRoot]); + + expect(acquireSpawnLock).toHaveBeenCalledTimes(1); + expect(spawn).toHaveBeenCalledTimes(1); + }); + + test("spawn throwing is non-fatal: foreground does not reject", async () => { + spawn.mockImplementationOnce(() => { + throw new Error("EAGAIN"); + }); + + await expect(runCli([tmpRoot])).resolves.toBeUndefined(); + + // Generate still ran; failure was swallowed and logged. + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(consoleError).toHaveBeenCalledWith( + expect.stringContaining("Could not start background type refresh"), + ); + }); + + test("--block (deliberate/CI) generates blocking and never spawns", async () => { + await runCli([tmpRoot, "--block"]); + + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + expect(acquireSpawnLock).not.toHaveBeenCalled(); + expect(spawn).not.toHaveBeenCalled(); + }); + + test("worker invocation (--worker-lock): runs blocking generate, releases lock, does NOT spawn", async () => { + const lockPath = getSpawnLockPath(tmpRoot); + + await runCli([tmpRoot, "--worker-lock", lockPath]); + + // A worker is always blocking — it does the real DESCRIBE lifecycle. + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + // It releases the SAME lock it was handed. + expect(releaseSpawnLock).toHaveBeenCalledWith(lockPath); + // It must never spawn another worker (recursion would never terminate). + expect(spawn).not.toHaveBeenCalled(); + expect(acquireSpawnLock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index c50fba7e0..939391d8a 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -1,6 +1,12 @@ +import { spawn } from "node:child_process"; import fs from "node:fs"; import path from "node:path"; -import { Command } from "commander"; +import { Command, Option } from "commander"; +import { + acquireSpawnLock, + getSpawnLockPath, + releaseSpawnLock, +} from "./spawn-lock.js"; /** * Resolve the typegen pre-flight mode for the CLI. Defaults to "non-blocking" — @@ -18,14 +24,30 @@ export function resolveTypegenMode(options?: { return options?.block ? "blocking" : "non-blocking"; } +/** Options parsed by commander for the generate-types command. */ +interface GenerateTypesOptions { + noCache?: boolean; + block?: boolean; + /** + * Internal: present only on the detached worker invocation. Carries the path + * of the single-flight lock this worker must release when it finishes. Its + * presence is what marks an invocation as "the worker" — workers always run + * with `--block`, so they never spawn another worker (only non-blocking runs + * spawn), which terminates the recursion. + */ + workerLock?: string; +} + /** - * Generate types command implementation + * Generate types command implementation. Runs the library generate (which, in + * non-blocking mode, writes degraded types and returns immediately). This is the + * SAME work the worker performs in blocking mode in the background. */ async function runGenerateTypes( rootDir?: string, outFile?: string, warehouseId?: string, - options?: { noCache?: boolean; block?: boolean }, + options?: GenerateTypesOptions, ) { try { const resolvedRootDir = rootDir || process.cwd(); @@ -97,6 +119,123 @@ async function runGenerateTypes( } } +/** + * Spawn the detached blocking worker that refreshes real types in the background + * after the foreground non-blocking generate has already written degraded types. + * + * Re-invokes THIS CLI (`process.execPath` + `process.argv[1]` — the bin entry + * that launched us) with `generate-types --block --worker-lock ` plus + * the same positional target options the foreground used, so the worker writes + * to the same out file / reads the same query folder. The worker is: + * - `detached: true` + `.unref()` so it outlives this process (install/dev-setup + * can finish and exit while the worker keeps warming the warehouse). + * - `stdio: "ignore"` so it never holds the parent's pipes open or interleaves + * output into the install/dev log. + * + * Spawning is wrapped so any failure is non-fatal: the caller still has degraded + * types and exits 0. + * + * @param lockPath - the acquired single-flight lock; passed to the worker so it + * releases the SAME lock when it finishes. + * @param targets - the foreground's positional args, forwarded verbatim. + * @returns true if the worker was spawned, false if spawning threw. + */ +export function spawnTypegenWorker( + lockPath: string, + targets: { rootDir?: string; outFile?: string; warehouseId?: string }, +): boolean { + // The script the runtime launched us with (the `appkit` bin shim). Re-running + // it under the same node binary reproduces this exact CLI in the worker. + const cliEntry = process.argv[1]; + + // Forward the positionals in declaration order (rootDir, outFile, + // warehouseId). Stop at the first undefined so we never pass a literal + // "undefined" — commander would treat it as a positional value. (rootDir is + // effectively always set by commander's default, but guard anyway.) + const positionals: string[] = []; + for (const value of [targets.rootDir, targets.outFile, targets.warehouseId]) { + if (value === undefined) break; + positionals.push(value); + } + + const args = [ + cliEntry, + "generate-types", + "--block", + "--worker-lock", + lockPath, + ...positionals, + ]; + + try { + const child = spawn(process.execPath, args, { + detached: true, + stdio: "ignore", + }); + child.unref(); + return true; + } catch (error) { + // Non-fatal: the foreground already wrote degraded types. Log and move on. + console.error( + `Could not start background type refresh: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return false; + } +} + +/** + * The command action. Orchestrates the non-blocking foreground contract: + * 1. Run the library generate (writes degraded types immediately in non-blocking + * mode; does the full blocking lifecycle when this is the worker). + * 2. If this is a non-blocking, non-worker invocation, try to spawn the detached + * blocking worker behind the single-flight lock. If the lock is already held + * by a live worker, skip (single-flight) with a one-line note. Either way the + * foreground returns normally (exit 0). + * 3. If this IS the worker (`--worker-lock` present), it ran blocking above and + * releases the lock here (and via a process-exit guard, so a hard failure / + * process.exit still frees it). + */ +async function generateTypesAction( + rootDir: string | undefined, + outFile: string | undefined, + warehouseId: string | undefined, + options: GenerateTypesOptions, +) { + const isWorker = typeof options.workerLock === "string"; + + // A worker must always free its lock, even if the blocking generate throws or + // calls process.exit (TypegenFatalError → exit 1). The exit handler covers the + // process.exit / uncaught paths; the finally covers the normal return. + if (isWorker && options.workerLock) { + const lockPath = options.workerLock; + process.once("exit", () => releaseSpawnLock(lockPath)); + } + + try { + await runGenerateTypes(rootDir, outFile, warehouseId, options); + } finally { + if (isWorker && options.workerLock) { + releaseSpawnLock(options.workerLock); + } + } + + // Only a non-blocking, non-worker invocation spawns. A worker is always + // --block (so resolveTypegenMode → "blocking"), which both prevents recursion + // and means we never get here for a worker. + if (!isWorker && resolveTypegenMode(options) === "non-blocking") { + const resolvedRootDir = rootDir || process.cwd(); + const lockPath = getSpawnLockPath(resolvedRootDir); + + if (acquireSpawnLock(lockPath)) { + spawnTypegenWorker(lockPath, { rootDir, outFile, warehouseId }); + } else { + console.log("Type refresh already in progress, skipping."); + } + } +} + export const generateTypesCommand = new Command("generate-types") .description("Generate TypeScript types from SQL queries") .argument("[rootDir]", "Root directory of the project", process.cwd()) @@ -111,6 +250,14 @@ export const generateTypesCommand = new Command("generate-types") "--block", "Block on warehouse readiness instead of degrading (use for CI)", ) + // Internal: marks the detached background worker and carries the lock it must + // release. Hidden from --help; users should never pass it. + .addOption( + new Option( + "--worker-lock ", + "Internal: detached worker lock path", + ).hideHelp(), + ) .addHelpText( "after", ` @@ -121,4 +268,4 @@ Examples: $ appkit generate-types --no-cache $ appkit generate-types --block # CI: wait for the warehouse and fail on a cold one`, ) - .action(runGenerateTypes); + .action(generateTypesAction); diff --git a/packages/shared/src/cli/commands/spawn-lock.test.ts b/packages/shared/src/cli/commands/spawn-lock.test.ts new file mode 100644 index 000000000..5566d4be4 --- /dev/null +++ b/packages/shared/src/cli/commands/spawn-lock.test.ts @@ -0,0 +1,93 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { + acquireSpawnLock, + getSpawnLockPath, + releaseSpawnLock, + SPAWN_LOCK_STALE_MS, +} from "./spawn-lock"; + +describe("spawn-lock", () => { + let tmpRoot: string; + let lockPath: string; + + beforeEach(() => { + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "spawnlock-")); + lockPath = path.join(tmpRoot, "worker.lock"); + }); + + afterEach(() => { + fs.rmSync(tmpRoot, { recursive: true, force: true }); + }); + + test("getSpawnLockPath nests under node_modules/.databricks/appkit of the root", () => { + const root = path.join(os.tmpdir(), "some-project"); + expect(getSpawnLockPath(root)).toBe( + path.join( + root, + "node_modules", + ".databricks", + "appkit", + ".appkit-typegen-worker.lock", + ), + ); + }); + + test("acquire creates the lock (and its parent dirs) and returns true", () => { + const nested = path.join(tmpRoot, "node_modules", ".databricks", "lock"); + expect(acquireSpawnLock(nested)).toBe(true); + expect(fs.existsSync(nested)).toBe(true); + // Body records pid for debugging. + expect(fs.readFileSync(nested, "utf8")).toContain(String(process.pid)); + }); + + test("acquire returns false when a FRESH lock is already held (single-flight)", () => { + expect(acquireSpawnLock(lockPath)).toBe(true); + // Second caller sees a fresh lock and backs off. + expect(acquireSpawnLock(lockPath)).toBe(false); + // The original lock file is untouched. + expect(fs.existsSync(lockPath)).toBe(true); + }); + + test("acquire STEALS a stale lock (older than staleMs) and recreates it", () => { + fs.writeFileSync(lockPath, "99999 0\n"); + // Backdate mtime well beyond the stale window. + const old = new Date(Date.now() - (SPAWN_LOCK_STALE_MS + 60_000)); + fs.utimesSync(lockPath, old, old); + + expect(acquireSpawnLock(lockPath)).toBe(true); + // Re-created with our pid — proves it was stolen, not just observed. + expect(fs.readFileSync(lockPath, "utf8")).toContain(String(process.pid)); + }); + + test("a custom staleMs governs the fresh/stale boundary", () => { + fs.writeFileSync(lockPath, "1 0\n"); + const fiveSecAgo = new Date(Date.now() - 5_000); + fs.utimesSync(lockPath, fiveSecAgo, fiveSecAgo); + + // 10s window: 5s-old lock is still fresh → not acquired. + expect(acquireSpawnLock(lockPath, 10_000)).toBe(false); + // 1s window: 5s-old lock is stale → stolen. + expect(acquireSpawnLock(lockPath, 1_000)).toBe(true); + }); + + test("release unlinks the lock", () => { + acquireSpawnLock(lockPath); + expect(fs.existsSync(lockPath)).toBe(true); + releaseSpawnLock(lockPath); + expect(fs.existsSync(lockPath)).toBe(false); + }); + + test("release is a no-op (no throw) when the lock is already gone", () => { + expect(() => releaseSpawnLock(lockPath)).not.toThrow(); + }); + + test("release then re-acquire works (full single-flight cycle)", () => { + expect(acquireSpawnLock(lockPath)).toBe(true); + expect(acquireSpawnLock(lockPath)).toBe(false); // held + releaseSpawnLock(lockPath); + expect(acquireSpawnLock(lockPath)).toBe(true); // freed → re-acquirable + }); +}); diff --git a/packages/shared/src/cli/commands/spawn-lock.ts b/packages/shared/src/cli/commands/spawn-lock.ts new file mode 100644 index 000000000..d4f86c4a5 --- /dev/null +++ b/packages/shared/src/cli/commands/spawn-lock.ts @@ -0,0 +1,148 @@ +import fs from "node:fs"; +import path from "node:path"; + +/** + * How long a spawn lock is considered fresh. A held lock newer than this means a + * background worker is genuinely in flight, so the foreground skips spawning; + * older than this the lock is presumed orphaned (the worker crashed/was killed + * before it could release) and is stolen. + * + * Must comfortably exceed the worker's worst-case runtime: the blocking preflight + * wait cap (PREFLIGHT_WAIT_MAX_MS = 5 min in the type-generator) plus a DESCRIBE + * budget. Six minutes leaves ~1 min of headroom over a worker that waits the full + * preflight window and then describes. + */ +export const SPAWN_LOCK_STALE_MS = 6 * 60 * 1000; + +/** + * Resolve the on-disk path of the single-flight spawn lock for a project. + * + * Lives alongside the type-generator cache (`node_modules/.databricks/appkit/`) + * so it shares the same already-creatable, gitignored, per-project location and + * doesn't introduce a new directory. The lock is keyed only by project root, so + * concurrent `generate-types` invocations for the same project (postinstall + + * predev, say) contend for one lock and only one wins the spawn. + * + * @param rootDir - project root (the resolved first CLI argument / cwd). + * @returns absolute path to the lock file. + */ +export function getSpawnLockPath(rootDir: string): string { + return path.join( + rootDir, + "node_modules", + ".databricks", + "appkit", + ".appkit-typegen-worker.lock", + ); +} + +/** + * Try to acquire the single-flight spawn lock. + * + * Atomic create via `fs.writeFileSync(lockPath, ..., { flag: "wx" })` — `wx` + * fails (EEXIST) if the file already exists, so the create itself is the + * mutual-exclusion primitive (no check-then-create race between two foreground + * processes). The lock body records pid + timestamp purely for debugging. + * + * On EEXIST we stat the existing lock: + * - fresh (mtime within {@link staleMs}) → a worker is in flight, return false. + * - stale (mtime older than staleMs) → presumed orphaned; unlink and recreate. + * The recreate also uses `wx`, so if a competing process steals it first we + * lose the race cleanly and return false. + * + * Any unexpected error (permission, ENOENT on a missing parent dir we couldn't + * create, …) is swallowed and reported as "not acquired": failing to take the + * lock must never break the foreground — at worst we skip the background refresh. + * + * @param lockPath - path returned by {@link getSpawnLockPath}. + * @param staleMs - age beyond which a held lock is stolen. Defaults to + * {@link SPAWN_LOCK_STALE_MS}. + * @returns true if this caller now owns the lock (and must release it), false if + * another live worker holds it or the lock couldn't be taken. + */ +export function acquireSpawnLock( + lockPath: string, + staleMs: number = SPAWN_LOCK_STALE_MS, +): boolean { + const body = `${process.pid} ${Date.now()}\n`; + + try { + fs.mkdirSync(path.dirname(lockPath), { recursive: true }); + } catch { + // Parent dir creation is best-effort; the create below will surface any real + // problem and we'll treat it as "not acquired". + } + + try { + fs.writeFileSync(lockPath, body, { flag: "wx" }); + return true; + } catch (error) { + if (!isErrnoException(error) || error.code !== "EEXIST") { + // Unexpected failure — don't let lock IO break the foreground. + return false; + } + } + + // Lock exists — decide fresh vs stale. + let mtimeMs: number; + try { + mtimeMs = fs.statSync(lockPath).mtimeMs; + } catch { + // It vanished between the failed create and the stat (released by the + // worker). Try once more to take it. + return tryCreate(lockPath, body); + } + + if (Date.now() - mtimeMs < staleMs) { + // A worker is genuinely in flight. + return false; + } + + // Stale: steal it. Unlink (ignore ENOENT — someone else may have cleaned up) + // then re-create with `wx` so we still lose cleanly to a racing stealer. + try { + fs.unlinkSync(lockPath); + } catch (error) { + if (isErrnoException(error) && error.code !== "ENOENT") { + return false; + } + } + return tryCreate(lockPath, body); +} + +/** + * Release the spawn lock. Unlink, ignoring ENOENT (already gone — e.g. it was + * stolen as stale by another process, or never existed). Any other error is + * swallowed: a failed release at worst leaves a stale lock that the next caller + * will steal after {@link SPAWN_LOCK_STALE_MS}. + * + * @param lockPath - path returned by {@link getSpawnLockPath}. + */ +export function releaseSpawnLock(lockPath: string): void { + try { + fs.unlinkSync(lockPath); + } catch { + // ENOENT or any other error — releasing is best-effort. + } +} + +/** + * Attempt an atomic `wx` create, returning whether it succeeded. EEXIST (a + * racing creator beat us) and any other error map to false. + */ +function tryCreate(lockPath: string, body: string): boolean { + try { + fs.writeFileSync(lockPath, body, { flag: "wx" }); + return true; + } catch { + return false; + } +} + +/** + * Narrow an unknown caught value to a Node errno exception so `.code` is safe to + * read. + */ +function isErrnoException(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && "code" in error; +} From f71c999882798598f4579bf5d49616b900ddcf5b Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 18:58:22 +0200 Subject: [PATCH 10/13] chore(appkit): wire app template to non-blocking typegen, document --block Template postinstall/predev now run the non-blocking default `appkit generate-types` (instant degrade + background refresh) instead of a dedicated no-block script; prebuild keeps `--block` for accurate CI types. Removed the now redundant `typegen:no-block` script. Documented the non-blocking default and the `--block` flag in the type-generation guide. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- docs/docs/development/type-generation.md | 12 ++++++++++++ template/package.json | 5 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/docs/development/type-generation.md b/docs/docs/development/type-generation.md index c433bb5a9..073a36bc3 100644 --- a/docs/docs/development/type-generation.md +++ b/docs/docs/development/type-generation.md @@ -72,6 +72,18 @@ npx @databricks/appkit generate-types [rootDir] [outFile] [warehouseId] npx @databricks/appkit generate-types --no-cache ``` +### Warehouse readiness and the `--block` flag + +By default, `generate-types` is **non-blocking**: it never waits on — or fails because of — your SQL warehouse. It writes the best types it can immediately (reusing cached types where the query is unchanged, otherwise `result: unknown`) and then spawns a detached background worker that refreshes the real types once the warehouse is ready. This keeps `npm install` (postinstall) and `npm run dev` (predev) fast and resilient to a cold or briefly-unreachable warehouse. The dev Vite plugin behaves the same way: types appear instantly and refresh in place once the warehouse is live. + +Pass `--block` for CI and production builds, where accurate types must be present before the build proceeds: + +```bash +npx @databricks/appkit generate-types --block +``` + +In blocking mode the generator starts a stopped warehouse, waits (bounded) for it to reach `RUNNING`, and then describes your queries. It fails only when the configured warehouse no longer exists (deleted/deleting), so a transient outage or a cold warehouse degrades gracefully rather than breaking the build. The app template wires this up for you: `postinstall` and `predev` run the non-blocking default, while `prebuild` runs `--block`. + ## How it works The type generator: diff --git a/template/package.json b/template/package.json index aae8f721c..106f6f0b4 100644 --- a/template/package.json +++ b/template/package.json @@ -20,12 +20,11 @@ "test:e2e:ui": "playwright test --ui", "test:smoke": "playwright install chromium && playwright test tests/smoke.spec.ts", "clean": "rm -rf client/dist dist build node_modules .smoke-test test-results playwright-report", - "postinstall": "npm run typegen:no-block", + "postinstall": "appkit generate-types", "prebuild": "npm run sync && npm run typegen", - "predev": "npm run sync && npm run typegen:no-block", + "predev": "npm run sync && appkit generate-types", "sync": "appkit plugin sync --write --silent", "typegen": "appkit generate-types --block", - "typegen:no-block": "appkit generate-types", "setup": "appkit setup --write" }, "keywords": [], From b698c438001a23f8b57bda55608f28b5491cf7ee Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 19:16:24 +0200 Subject: [PATCH 11/13] fix(appkit): don't print typegen SQL errors twice A failed DESCRIBE logged the raw SQL error message per query during the describe loop, and the aggregated TypegenSyntaxError (printed by the Vite plugin / CLI) carries the same message in its formatted block -- so every SQL syntax error showed up twice in dev. Drop the per-query warn; the formatted block and the summary table already surface it. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/type-generator/query-registry.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index ce4b82396..ce0cdd0b7 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -703,7 +703,9 @@ export async function generateQueriesFromDescribe( // failed — a genuine SQL error (bad table, syntax, incompatible type). const sqlError = result.status.error?.message || "Query execution failed"; - logger.warn("DESCRIBE failed for %s: %s", queryName, sqlError); + // The failure is surfaced once, formatted, by the aggregated + // TypegenSyntaxError (and the summary table) — don't also log the raw + // message here or every SQL error prints twice in dev. const type = generateUnknownResultQuery(sql, queryName); return { status: "syntax", From 2947a69623b436ef32affc467247e008bddb5d77 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 19:26:19 +0200 Subject: [PATCH 12/13] fix(appkit): forward execArgv so the detached typegen worker runs under tsx The non-blocking CLI spawned its background worker as `node ` without the parent's node/loader flags. Run from source via tsx, argv[1] is a .ts file that plain node can't parse, so the worker died silently (detached + stdio ignore) and the degraded types never refreshed -- the queries appeared to never run. Forward process.execArgv, which carries tsx's --require/--import loader flags (and is empty for the built bin, so production is unaffected), so the worker runs under the same runtime as the parent. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../shared/src/cli/commands/generate-types.test.ts | 11 +++++++++-- packages/shared/src/cli/commands/generate-types.ts | 7 +++++++ template/package.json | 8 ++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts index bdece9983..73a378fdc 100644 --- a/packages/shared/src/cli/commands/generate-types.test.ts +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -146,8 +146,15 @@ describe("generate-types foreground spawn orchestration", () => { expect(spawn).toHaveBeenCalledTimes(1); const [bin, argv, opts] = spawn.mock.calls[0]; expect(bin).toBe(process.execPath); - expect(argv[0]).toBe(process.argv[1]); // CLI entry - expect(argv.slice(1)).toEqual([ + // The parent's node/loader flags (process.execArgv — e.g. tsx's + // --require/--import) are forwarded before the CLI entry so a worker spawned + // from a source/tsx run can still execute the .ts. Everything from the entry + // onward is the worker invocation. + const entryIdx = argv.indexOf(process.argv[1]); + expect(entryIdx).toBeGreaterThanOrEqual(0); + expect(argv.slice(0, entryIdx)).toEqual(process.execArgv); + expect(argv.slice(entryIdx)).toEqual([ + process.argv[1], // CLI entry "generate-types", "--block", "--worker-lock", diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index 939391d8a..eac018483 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -159,6 +159,13 @@ export function spawnTypegenWorker( } const args = [ + // Forward the parent's node/loader flags so the worker runs under the same + // runtime. Critically this carries tsx's `--require`/`--import` when the CLI + // is run from source (`tsx index.ts …`); without them the worker would be + // `node index.ts …`, which can't parse TypeScript and dies silently — the + // degraded types would then never refresh. Empty for the built bin (plain + // `node bin/appkit.js`), so production behaviour is unchanged. + ...process.execArgv, cliEntry, "generate-types", "--block", diff --git a/template/package.json b/template/package.json index 106f6f0b4..124bd7cad 100644 --- a/template/package.json +++ b/template/package.json @@ -20,11 +20,11 @@ "test:e2e:ui": "playwright test --ui", "test:smoke": "playwright install chromium && playwright test tests/smoke.spec.ts", "clean": "rm -rf client/dist dist build node_modules .smoke-test test-results playwright-report", - "postinstall": "appkit generate-types", - "prebuild": "npm run sync && npm run typegen", - "predev": "npm run sync && appkit generate-types", + "postinstall": "npm run typegen", + "prebuild": "npm run sync && npm run typegen -- --block", + "predev": "npm run sync && npm run typegen", "sync": "appkit plugin sync --write --silent", - "typegen": "appkit generate-types --block", + "typegen": "appkit generate-types", "setup": "appkit setup --write" }, "keywords": [], From 03ee7929ca301631d8e5c0d2b6e4292f48caa43a Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Fri, 5 Jun 2026 19:44:50 +0200 Subject: [PATCH 13/13] refactor(appkit): rename typegen --block flag to --wait Rename the user-facing CLI flag --block to --wait (commander option property block -> wait), including the detached worker's self-spawn arg, the --help example, the template prebuild script, and the type-generation guide. The internal "blocking"/"non-blocking" PreflightMode names are unchanged -- they describe runtime behaviour and aren't user-facing. The flag only ever existed on this branch (unreleased), so no deprecation alias is needed. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- docs/docs/development/type-generation.md | 8 +++---- .../src/cli/commands/generate-types.test.ts | 22 +++++++++---------- .../shared/src/cli/commands/generate-types.ts | 22 +++++++++---------- template/package.json | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/docs/development/type-generation.md b/docs/docs/development/type-generation.md index 073a36bc3..c715791a9 100644 --- a/docs/docs/development/type-generation.md +++ b/docs/docs/development/type-generation.md @@ -72,17 +72,17 @@ npx @databricks/appkit generate-types [rootDir] [outFile] [warehouseId] npx @databricks/appkit generate-types --no-cache ``` -### Warehouse readiness and the `--block` flag +### Warehouse readiness and the `--wait` flag By default, `generate-types` is **non-blocking**: it never waits on — or fails because of — your SQL warehouse. It writes the best types it can immediately (reusing cached types where the query is unchanged, otherwise `result: unknown`) and then spawns a detached background worker that refreshes the real types once the warehouse is ready. This keeps `npm install` (postinstall) and `npm run dev` (predev) fast and resilient to a cold or briefly-unreachable warehouse. The dev Vite plugin behaves the same way: types appear instantly and refresh in place once the warehouse is live. -Pass `--block` for CI and production builds, where accurate types must be present before the build proceeds: +Pass `--wait` for CI and production builds, where accurate types must be present before the build proceeds: ```bash -npx @databricks/appkit generate-types --block +npx @databricks/appkit generate-types --wait ``` -In blocking mode the generator starts a stopped warehouse, waits (bounded) for it to reach `RUNNING`, and then describes your queries. It fails only when the configured warehouse no longer exists (deleted/deleting), so a transient outage or a cold warehouse degrades gracefully rather than breaking the build. The app template wires this up for you: `postinstall` and `predev` run the non-blocking default, while `prebuild` runs `--block`. +In blocking mode the generator starts a stopped warehouse, waits (bounded) for it to reach `RUNNING`, and then describes your queries. It fails only when the configured warehouse no longer exists (deleted/deleting), so a transient outage or a cold warehouse degrades gracefully rather than breaking the build. The app template wires this up for you: `postinstall` and `predev` run the non-blocking default, while `prebuild` runs `--wait`. ## How it works diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts index 73a378fdc..df65f047c 100644 --- a/packages/shared/src/cli/commands/generate-types.test.ts +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -70,14 +70,14 @@ import { generateTypesCommand, resolveTypegenMode } from "./generate-types"; /** * Drive the real commander command the way the bin does, so argv parsing - * (`--block`, `--worker-lock ` → camelCase, positionals) is exercised + * (`--wait`, `--worker-lock ` → camelCase, positionals) is exercised * end-to-end. `from: "user"` means args are the user-supplied tokens only. */ async function runCli(args: string[]): Promise { await generateTypesCommand.parseAsync(args, { from: "user" }); } -describe("resolveTypegenMode (generate-types --block)", () => { +describe("resolveTypegenMode (generate-types --wait)", () => { test("defaults to non-blocking when no options/flag are given", () => { // A one-shot CLI never describes by default — it emits cached/`unknown` types // and exits 0 instead of blocking on (or failing because of) a warehouse, @@ -86,15 +86,15 @@ describe("resolveTypegenMode (generate-types --block)", () => { expect(resolveTypegenMode({})).toBe("non-blocking"); }); - test("stays non-blocking when block is false (flag absent)", () => { - expect(resolveTypegenMode({ block: false })).toBe("non-blocking"); + test("stays non-blocking when wait is false (flag absent)", () => { + expect(resolveTypegenMode({ wait: false })).toBe("non-blocking"); }); - test("switches to blocking when --block sets block to true", () => { - // commander maps `--block` to `{ block: true }`. A deliberate/CI invocation + test("switches to blocking when --wait sets wait to true", () => { + // commander maps `--wait` to `{ wait: true }`. A deliberate/CI invocation // opts in to waiting for a starting warehouse and failing fast on a stopped // one. - expect(resolveTypegenMode({ block: true })).toBe("blocking"); + expect(resolveTypegenMode({ wait: true })).toBe("blocking"); }); }); @@ -141,7 +141,7 @@ describe("generate-types foreground spawn orchestration", () => { expect.objectContaining({ mode: "non-blocking" }), ); - // Exactly one detached worker, re-invoking this CLI with --block and the + // Exactly one detached worker, re-invoking this CLI with --wait and the // worker lock, forwarding the same positional targets. expect(spawn).toHaveBeenCalledTimes(1); const [bin, argv, opts] = spawn.mock.calls[0]; @@ -156,7 +156,7 @@ describe("generate-types foreground spawn orchestration", () => { expect(argv.slice(entryIdx)).toEqual([ process.argv[1], // CLI entry "generate-types", - "--block", + "--wait", "--worker-lock", getSpawnLockPath(tmpRoot), tmpRoot, @@ -205,8 +205,8 @@ describe("generate-types foreground spawn orchestration", () => { ); }); - test("--block (deliberate/CI) generates blocking and never spawns", async () => { - await runCli([tmpRoot, "--block"]); + test("--wait (deliberate/CI) generates blocking and never spawns", async () => { + await runCli([tmpRoot, "--wait"]); expect(generateFromEntryPoint).toHaveBeenCalledWith( expect.objectContaining({ mode: "blocking" }), diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index eac018483..03c1631a1 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -14,25 +14,25 @@ import { * describes at all: it skips the warehouse probe AND every DESCRIBE, emits * best-available types (cache where the SQL hash matches, else `result: unknown`) * and returns immediately, never blocking on — or failing because of — a - * warehouse, even a RUNNING one. Pass `--block` (commander sets `block: true`) + * warehouse, even a RUNNING one. Pass `--wait` (commander sets `wait: true`) * for a deliberate/CI invocation that should wait for a starting warehouse and * fail fast on a stopped one. */ export function resolveTypegenMode(options?: { - block?: boolean; + wait?: boolean; }): "non-blocking" | "blocking" { - return options?.block ? "blocking" : "non-blocking"; + return options?.wait ? "blocking" : "non-blocking"; } /** Options parsed by commander for the generate-types command. */ interface GenerateTypesOptions { noCache?: boolean; - block?: boolean; + wait?: boolean; /** * Internal: present only on the detached worker invocation. Carries the path * of the single-flight lock this worker must release when it finishes. Its * presence is what marks an invocation as "the worker" — workers always run - * with `--block`, so they never spawn another worker (only non-blocking runs + * with `--wait`, so they never spawn another worker (only non-blocking runs * spawn), which terminates the recursion. */ workerLock?: string; @@ -124,7 +124,7 @@ async function runGenerateTypes( * after the foreground non-blocking generate has already written degraded types. * * Re-invokes THIS CLI (`process.execPath` + `process.argv[1]` — the bin entry - * that launched us) with `generate-types --block --worker-lock ` plus + * that launched us) with `generate-types --wait --worker-lock ` plus * the same positional target options the foreground used, so the worker writes * to the same out file / reads the same query folder. The worker is: * - `detached: true` + `.unref()` so it outlives this process (install/dev-setup @@ -168,7 +168,7 @@ export function spawnTypegenWorker( ...process.execArgv, cliEntry, "generate-types", - "--block", + "--wait", "--worker-lock", lockPath, ...positionals, @@ -229,7 +229,7 @@ async function generateTypesAction( } // Only a non-blocking, non-worker invocation spawns. A worker is always - // --block (so resolveTypegenMode → "blocking"), which both prevents recursion + // --wait (so resolveTypegenMode → "blocking"), which both prevents recursion // and means we never get here for a worker. if (!isWorker && resolveTypegenMode(options) === "non-blocking") { const resolvedRootDir = rootDir || process.cwd(); @@ -254,8 +254,8 @@ export const generateTypesCommand = new Command("generate-types") .argument("[warehouseId]", "Databricks warehouse ID") .option("--no-cache", "Disable caching for type generation") .option( - "--block", - "Block on warehouse readiness instead of degrading (use for CI)", + "--wait", + "Wait for warehouse readiness instead of degrading (use for CI)", ) // Internal: marks the detached background worker and carries the lock it must // release. Hidden from --help; users should never pass it. @@ -273,6 +273,6 @@ Examples: $ appkit generate-types . shared/appkit-types/analytics.d.ts $ appkit generate-types . shared/appkit-types/analytics.d.ts my-warehouse-id $ appkit generate-types --no-cache - $ appkit generate-types --block # CI: wait for the warehouse and fail on a cold one`, + $ appkit generate-types --wait # CI: wait for the warehouse and fail on a cold one`, ) .action(generateTypesAction); diff --git a/template/package.json b/template/package.json index 124bd7cad..94d8c8677 100644 --- a/template/package.json +++ b/template/package.json @@ -21,7 +21,7 @@ "test:smoke": "playwright install chromium && playwright test tests/smoke.spec.ts", "clean": "rm -rf client/dist dist build node_modules .smoke-test test-results playwright-report", "postinstall": "npm run typegen", - "prebuild": "npm run sync && npm run typegen -- --block", + "prebuild": "npm run sync && npm run typegen -- --wait", "predev": "npm run sync && npm run typegen", "sync": "appkit plugin sync --write --silent", "typegen": "appkit generate-types",