diff --git a/docs/docs/development/type-generation.md b/docs/docs/development/type-generation.md index c433bb5a9..c715791a9 100644 --- a/docs/docs/development/type-generation.md +++ b/docs/docs/development/type-generation.md @@ -72,6 +72,18 @@ npx @databricks/appkit generate-types [rootDir] [outFile] [warehouseId] npx @databricks/appkit generate-types --no-cache ``` +### Warehouse readiness and the `--wait` flag + +By default, `generate-types` is **non-blocking**: it never waits on — or fails because of — your SQL warehouse. It writes the best types it can immediately (reusing cached types where the query is unchanged, otherwise `result: unknown`) and then spawns a detached background worker that refreshes the real types once the warehouse is ready. This keeps `npm install` (postinstall) and `npm run dev` (predev) fast and resilient to a cold or briefly-unreachable warehouse. The dev Vite plugin behaves the same way: types appear instantly and refresh in place once the warehouse is live. + +Pass `--wait` for CI and production builds, where accurate types must be present before the build proceeds: + +```bash +npx @databricks/appkit generate-types --wait +``` + +In blocking mode the generator starts a stopped warehouse, waits (bounded) for it to reach `RUNNING`, and then describes your queries. It fails only when the configured warehouse no longer exists (deleted/deleting), so a transient outage or a cold warehouse degrades gracefully rather than breaking the build. The app template wires this up for you: `postinstall` and `predev` run the non-blocking default, while `prebuild` runs `--wait`. + ## How it works The type generator: diff --git a/packages/appkit/src/type-generator/index.ts b/packages/appkit/src/type-generator/index.ts index c9a528fe7..42b8c3244 100644 --- a/packages/appkit/src/type-generator/index.ts +++ b/packages/appkit/src/type-generator/index.ts @@ -1,20 +1,169 @@ import fs from "node:fs/promises"; import path from "node:path"; import dotenv from "dotenv"; +import pc from "picocolors"; import { createLogger } from "../logging/logger"; import { migrateProjectConfig, removeOldGeneratedTypes, resolveProjectRoot, } from "./migration"; +import type { PreflightMode } from "./preflight"; import { generateQueriesFromDescribe } from "./query-registry"; import { generateServingTypes as generateServingTypesImpl } from "./serving/generator"; -import type { QuerySchema } from "./types"; +import type { QueryFatalError, QuerySchema, QuerySyntaxError } from "./types"; dotenv.config(); const logger = createLogger("type-generator"); +type TypegenFailure = QuerySyntaxError | QueryFatalError; + +function plural(count: number, singular: string, pluralForm = `${singular}s`) { + return count === 1 ? singular : pluralForm; +} + +function formatFailureRows( + label: string, + queries: TypegenFailure[], + color: (value: string) => string, +) { + if (queries.length === 0) return []; + + // Group by message so a shared failure — e.g. a warehouse-level fatal that + // hits every query identically — prints once instead of repeating per row. + const byMessage = new Map(); + for (const { name, message } of queries) { + const names = byMessage.get(message); + if (names) names.push(name); + else byMessage.set(message, [name]); + } + + const maxNameLen = Math.max(...queries.map((query) => query.name.length)); + const tag = color(label.padEnd(7)); + const rows: string[] = []; + for (const [message, names] of byMessage) { + // Unique message → keep the compact one-line `tag name message` form. + if (names.length === 1) { + rows.push( + ` ${tag} ${pc.bold(names[0].padEnd(maxNameLen))} ${pc.dim(message)}`, + ); + continue; + } + // Shared message → print it once, then list the affected query names. + rows.push( + ` ${tag} ${pc.dim(message)} ${pc.dim(`(${names.length} ${plural(names.length, "query", "queries")})`)}`, + ); + rows.push( + ` ${names.map((name) => pc.bold(name)).join(pc.dim(", "))}`, + ); + } + return rows; +} + +function formatTypegenFailureMessage(options: { + syntaxErrors: QuerySyntaxError[]; + fatalErrors?: QueryFatalError[]; + warehouseId?: string; + title: string; + causes: string[]; + nextStep: string; +}) { + const { syntaxErrors, fatalErrors = [], warehouseId, title } = options; + const total = syntaxErrors.length + fatalErrors.length; + const separator = pc.dim("─".repeat(60)); + const warehouse = warehouseId + ? ` against ${pc.dim(`warehouse ${warehouseId}`)}` + : ""; + + return [ + ` ${pc.bold(pc.red("Type generation failed"))}`, + ` ${separator}`, + ` ${title}: ${total} ${plural(total, "query", "queries")} could not be described${warehouse}.`, + ` AppKit wrote generated types with ${pc.bold("result: unknown")} for the failed ${plural(total, "query", "queries")}.`, + "", + ...formatFailureRows("SQL ERR", syntaxErrors, pc.red), + ...(syntaxErrors.length > 0 && fatalErrors.length > 0 ? [""] : []), + ...formatFailureRows("FATAL", fatalErrors, pc.red), + "", + ` ${pc.bold("Common causes")}`, + ...options.causes.map((cause) => ` - ${cause}`), + "", + ` ${pc.bold("Next step")}`, + ` ${options.nextStep}`, + ].join("\n"); +} + +/** + * Thrown when one or more queries fail `DESCRIBE QUERY` against a *reachable* + * warehouse — i.e. genuine SQL errors (bad table, syntax, incompatible type), + * as opposed to a connectivity failure (warehouse unreachable), which degrades + * silently. Whether this is fatal is the caller's decision: the Vite plugin and + * CLI fail the build in production and warn-only in development. + */ +export class TypegenSyntaxError extends Error { + readonly queries: QuerySyntaxError[]; + readonly fatalQueries: QueryFatalError[]; + + constructor( + queries: QuerySyntaxError[], + warehouseId?: string, + fatalQueries: QueryFatalError[] = [], + ) { + super( + formatTypegenFailureMessage({ + syntaxErrors: queries, + fatalErrors: fatalQueries, + warehouseId, + title: "DESCRIBE QUERY failed", + causes: [ + "SQL syntax errors", + "missing tables or views", + "warehouse format incompatibilities", + ], + nextStep: warehouseId + ? `Run each SQL ERR query directly in a Databricks SQL editor against warehouse ${pc.bold(warehouseId)}.` + : "Run each SQL ERR query directly in a Databricks SQL editor.", + }), + ); + this.name = "TypegenSyntaxError"; + this.queries = queries; + this.fatalQueries = fatalQueries; + } +} + +/** + * Thrown when DESCRIBE QUERY could not be requested because of a non-SQL fatal + * setup/request problem, such as missing permissions, invalid warehouse IDs, or + * malformed SDK configuration. Like TypegenSyntaxError, this is thrown only + * after the declaration file has been written with `result: unknown` entries. + */ +export class TypegenFatalError extends Error { + readonly queries: QueryFatalError[]; + + constructor(queries: QueryFatalError[], warehouseId?: string) { + super( + formatTypegenFailureMessage({ + syntaxErrors: [], + fatalErrors: queries, + warehouseId, + title: "DESCRIBE QUERY could not be requested", + causes: [ + "missing warehouse permissions", + "invalid warehouse ID", + "authentication failure", + "SDK configuration errors", + ], + nextStep: warehouseId + ? `Verify access to warehouse ${pc.bold(warehouseId)} and rerun type generation.` + : "Verify warehouse access and rerun type generation.", + }), + ); + this.name = "TypegenFatalError"; + this.queries = queries; + } +} + /** * Generate type declarations for QueryRegistry * Create the d.ts file from the plugin routes and query schemas @@ -57,35 +206,30 @@ export async function generateFromEntryPoint(options: { queryFolder?: string; warehouseId: string; noCache?: boolean; + mode?: PreflightMode; }) { - const { outFile, queryFolder, warehouseId, noCache } = options; + const { + outFile, + queryFolder, + warehouseId, + noCache, + mode = "non-blocking", + } = options; const projectRoot = resolveProjectRoot(outFile); logger.debug("Starting type generation..."); let queryRegistry: QuerySchema[] = []; - if (queryFolder) - queryRegistry = await generateQueriesFromDescribe( - queryFolder, - warehouseId, - { - noCache, - }, - ); - - const failedQueries = queryRegistry.filter((q) => - q.type.includes("result: unknown"), - ); - if (failedQueries.length > 0) { - const names = failedQueries.map((q) => q.name).join(", "); - throw new Error( - [ - `Type generation failed: ${failedQueries.length} ${failedQueries.length === 1 ? "query" : "queries"} could not be described: ${names}.`, - `DESCRIBE QUERY failed for these queries — see the error codes above for details.`, - `Common causes: SQL syntax errors, missing tables/views, or warehouse format incompatibilities.`, - `To debug: run the failing query directly in a SQL editor against warehouse ${warehouseId}.`, - ].join("\n"), - ); + let syntaxErrors: QuerySyntaxError[] = []; + let fatalErrors: QueryFatalError[] = []; + if (queryFolder) { + const result = await generateQueriesFromDescribe(queryFolder, warehouseId, { + noCache, + mode, + }); + queryRegistry = result.schemas; + syntaxErrors = result.syntaxErrors ?? []; + fatalErrors = result.fatalErrors ?? []; } const typeDeclarations = generateTypeDeclarations(queryRegistry); @@ -97,6 +241,17 @@ export async function generateFromEntryPoint(options: { await removeOldGeneratedTypes(projectRoot, "appKitTypes.d.ts"); await migrateProjectConfig(projectRoot); + // Types are always written above — including `result: unknown` for any query + // that could not be described. Connectivity failures pass silently so a + // transient warehouse outage never blocks a build; genuine SQL errors and + // non-connectivity fatal request failures surface after the file write. + if (syntaxErrors.length > 0) { + throw new TypegenSyntaxError(syntaxErrors, warehouseId, fatalErrors); + } + if (fatalErrors.length > 0) { + throw new TypegenFatalError(fatalErrors, warehouseId); + } + logger.debug("Type generation complete!"); } diff --git a/packages/appkit/src/type-generator/preflight.ts b/packages/appkit/src/type-generator/preflight.ts new file mode 100644 index 000000000..e35c92592 --- /dev/null +++ b/packages/appkit/src/type-generator/preflight.ts @@ -0,0 +1,63 @@ +import type { WarehouseState } from "./warehouse-status"; + +/** + * How aggressively typegen should react to a not-ready warehouse. + * - `non-blocking`: never describe and never probe the warehouse — emit + * best-available types (cache where the SQL hash matches, else `unknown`) and + * return at once. The default for interactive/foreground runs that can't + * afford to block on (or fail because of) a warehouse, even a RUNNING one. + * - `blocking`: a startable warehouse is worth waiting for, and a stopped one + * is worth starting — only a deleted/deleting warehouse is a hard failure. + */ +export type PreflightMode = "non-blocking" | "blocking"; + +/** + * What the caller should do given a warehouse state and mode. + * - `proceed`: run DESCRIBE now. + * - `degradeAll`: skip DESCRIBE; emit degraded (cached/`unknown`) types. + * - `waitThenProceed`: wait for the warehouse to start, then run DESCRIBE. + * - `startWaitProceed`: start the stopped warehouse, wait for RUNNING, then + * run DESCRIBE. + * - `fatal`: stop — the warehouse can't serve this run. + */ +export type PreflightDecision = + | "proceed" + | "degradeAll" + | "waitThenProceed" + | "startWaitProceed" + | "fatal"; + +/** + * Pure policy mapping a warehouse state + mode to a preflight decision. + * + * Unknown/unexpected states fall through to `proceed`: the describe loop and + * its per-query backstop already degrade gracefully, so we don't want a new + * SDK state value to turn into a spurious `fatal`. + */ +export function decidePreflight( + state: WarehouseState, + mode: PreflightMode, +): PreflightDecision { + // `non-blocking` never describes regardless of state: emit cached/`unknown` + // types and return. The caller short-circuits before probing, so this is only + // a belt-and-suspenders mapping, but it keeps the policy total and + // self-contained. + if (mode === "non-blocking") return "degradeAll"; + + // `blocking`: a starting warehouse is worth waiting for, a stopped one is + // worth starting (then waiting), and only a deleted/deleting one is fatal. + switch (state) { + case "RUNNING": + return "proceed"; + case "STARTING": + return "waitThenProceed"; + case "STOPPED": + case "STOPPING": + return "startWaitProceed"; + case "DELETED": + case "DELETING": + return "fatal"; + default: + return "proceed"; + } +} diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts index 06ee64bac..ce0cdd0b7 100644 --- a/packages/appkit/src/type-generator/query-registry.ts +++ b/packages/appkit/src/type-generator/query-registry.ts @@ -4,16 +4,32 @@ import { WorkspaceClient } from "@databricks/sdk-experimental"; import pc from "picocolors"; import { createLogger } from "../logging/logger"; import { CACHE_VERSION, hashSQL, loadCache, saveCache } from "./cache"; +import { decidePreflight, type PreflightMode } from "./preflight"; import { Spinner } from "./spinner"; import { type DatabricksStatementExecutionResponse, + type QueryFatalError, + type QueryGenerationResult, type QuerySchema, + type QuerySyntaxError, sqlTypeToHelper, sqlTypeToMarker, } from "./types"; +import { + getWarehouseState, + startWarehouse, + waitUntilRunning, +} from "./warehouse-status"; const logger = createLogger("type-generator:query-registry"); +/** + * Upper bound on how long a `blocking`-mode preflight will wait for a starting + * warehouse to reach RUNNING before giving up (~5 min). Generous enough to ride + * out a cold start without hanging an interactive CLI invocation indefinitely. + */ +const PREFLIGHT_WAIT_MAX_MS = 300_000; + /** * Regex breakdown: * '(?:[^']|'')*' — matches a SQL string literal, including escaped '' pairs @@ -82,6 +98,129 @@ function parseError(raw: string): { code?: string; message: string } { return { message: raw }; } +function isObject(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function getErrorMessage(error: unknown): string { + if (error instanceof Error) return error.message; + if (isObject(error) && typeof error.message === "string") { + return error.message; + } + return String(error); +} + +function getErrorDiagnostic(error: unknown): string { + const seen = new Set(); + const messages: string[] = []; + const stack = [error]; + + while (stack.length > 0) { + const current = stack.pop(); + if (current === undefined || seen.has(current)) continue; + seen.add(current); + + const message = getErrorMessage(current); + if ( + message && + message !== "[object Object]" && + !messages.includes(message) + ) { + messages.push(message); + } + + const code = getErrorCode(current); + if (code && !messages.includes(code)) messages.push(code); + + stack.push(...getErrorChildren(current)); + } + + return messages.length > 0 ? messages.join(": ") : getErrorMessage(error); +} + +function getErrorCode(error: unknown): string | undefined { + if (!isObject(error)) return undefined; + const code = error.code ?? error.errno; + return typeof code === "string" ? code : undefined; +} + +function getErrorStatus(error: unknown): number | undefined { + if (!isObject(error)) return undefined; + const direct = error.status ?? error.statusCode; + if (typeof direct === "number") return direct; + if (isObject(error.response) && typeof error.response.status === "number") { + return error.response.status; + } + return undefined; +} + +function getErrorChildren(error: unknown): unknown[] { + if (!isObject(error)) return []; + const children: unknown[] = []; + if ("cause" in error) children.push(error.cause); + if (error instanceof AggregateError) children.push(...error.errors); + return children; +} + +const CONNECTIVITY_ERROR_CODES = new Set([ + "ECONNREFUSED", + "ECONNRESET", + "ENOTFOUND", + "ETIMEDOUT", + "EAI_AGAIN", + "EAI_NODATA", + "EAI_NONAME", + "EHOSTUNREACH", + "ENETUNREACH", + "CERT_HAS_EXPIRED", + "DEPTH_ZERO_SELF_SIGNED_CERT", + "ERR_TLS_CERT_ALTNAME_INVALID", + "SELF_SIGNED_CERT_IN_CHAIN", + "UNABLE_TO_VERIFY_LEAF_SIGNATURE", +]); + +function isConnectivityMessage(message: string): boolean { + return ( + /\bconnection (?:refused|reset|timed out)\b/i.test(message) || + /\bsocket hang up\b/i.test(message) || + /\bnetwork error\b/i.test(message) || + /\bcan'?t connect to\b/i.test(message) || + /\bcertificate has expired\b/i.test(message) || + /\bunable to verify the first certificate\b/i.test(message) || + /\bupstream connect error or disconnect\/reset before headers\b/i.test( + message, + ) + ); +} + +function isConnectivityError(error: unknown): boolean { + const seen = new Set(); + const stack = [error]; + + while (stack.length > 0) { + const current = stack.pop(); + if (current === undefined || seen.has(current)) continue; + seen.add(current); + + const code = getErrorCode(current); + if ( + code && + (CONNECTIVITY_ERROR_CODES.has(code) || code.startsWith("UND_ERR_")) + ) { + return true; + } + + const status = getErrorStatus(current); + if (status === 502 || status === 503 || status === 504) return true; + + if (isConnectivityMessage(getErrorMessage(current))) return true; + + stack.push(...getErrorChildren(current)); + } + + return false; +} + /** * Extract parameters from a SQL query * @param sql - the SQL query to extract parameters from @@ -191,6 +330,25 @@ function generateUnknownResultQuery(sql: string, queryName: string): string { }`; } +/** + * Degrade gracefully when DESCRIBE can't produce a fresh schema (transient + * connectivity outage, or a warehouse that's reachable but not ready). Reuse + * the last-good cached type when the SQL hash is unchanged, otherwise emit + * `unknown` from SQL alone. Never persists `result: unknown`. + */ +function degradedType( + cache: Awaited>, + queryName: string, + sql: string, + sqlHash: string, +): string { + const prior = cache.queries[queryName]; + const canReusePrior = prior?.hash === sqlHash && !prior.retry; + return canReusePrior + ? prior.type + : generateUnknownResultQuery(sql, queryName); +} + export function extractParameterTypes(sql: string): Record { const paramTypes: Record = {}; // Alternation order matters: TIMESTAMP_NTZ must precede TIMESTAMP so the @@ -266,14 +424,27 @@ export function inferParameterTypes( * @param warehouseId - the warehouse id to use for schema analysis * @param options - options for the query generation * @param options.noCache - if true, skip the cache and regenerate all types + * @param options.mode - preflight policy: "non-blocking" never probes the + * warehouse and never describes (emits cached/`unknown` types and returns + * immediately), "blocking" waits for a starting warehouse and starts (then + * waits for) a stopped one, treating only a deleted/deleting warehouse as + * fatal. Defaults to "non-blocking". * @returns an array of query schemas */ export async function generateQueriesFromDescribe( queryFolder: string, warehouseId: string, - options: { noCache?: boolean; concurrency?: number } = {}, -): Promise { - const { noCache = false, concurrency: rawConcurrency = 10 } = options; + options: { + noCache?: boolean; + concurrency?: number; + mode?: PreflightMode; + } = {}, +): Promise { + const { + noCache = false, + concurrency: rawConcurrency = 10, + mode = "non-blocking", + } = options; const concurrency = typeof rawConcurrency === "number" && Number.isFinite(rawConcurrency) ? Math.max(1, Math.floor(rawConcurrency)) @@ -314,7 +485,10 @@ export async function generateQueriesFromDescribe( const logEntries: Array<{ queryName: string; status: "HIT" | "MISS"; - failed?: boolean; + // Absent for clean hits/misses. "syntax" = bad SQL on a reachable warehouse; + // "connectivity" = warehouse unreachable; "empty" = described but no columns; + // "fatal" = non-SQL setup/request failure surfaced after .d.ts emission. + kind?: "syntax" | "connectivity" | "empty" | "fatal"; error?: { code?: string; message: string }; }> = []; @@ -369,130 +543,317 @@ export async function generateQueriesFromDescribe( // Phase 2: Execute all uncached DESCRIBE calls in parallel type DescribeResult = | { + // Described successfully with a result schema — the only case we cache. status: "ok"; index: number; schema: QuerySchema; cacheEntry: { hash: string; type: string; retry: boolean }; } | { - status: "fail"; + // Reachable warehouse ran DESCRIBE and rejected the statement — a + // genuine SQL error. Eligible to fail the build; never cached. + status: "syntax"; index: number; schema: QuerySchema; - cacheEntry: { hash: string; type: string; retry: boolean }; error: { code?: string; message: string }; + } + | { + // DESCRIBE succeeded but returned no columns — soft `unknown`. Not a + // failure, not cached, retried next run. + status: "empty"; + index: number; + schema: QuerySchema; + } + | { + // Warehouse reachable but returned a non-terminal state (PENDING/ + // RUNNING) with no rows — stopped/cold-starting/busy. Degrade like a + // transient outage (reuse cache or `unknown`); not cached, not empty. + status: "unavailable"; + index: number; + schema: QuerySchema; }; const freshResults: Array<{ index: number; schema: QuerySchema }> = []; + // Genuine SQL errors (reachable warehouse). Connectivity failures are NOT + // recorded here — they degrade silently so a transient outage isn't fatal. + const syntaxErrors: QuerySyntaxError[] = []; + const fatalErrors: QueryFatalError[] = []; if (uncachedQueries.length > 0) { - let completed = 0; - const total = uncachedQueries.length; - spinner.start( - `Describing ${total} ${total === 1 ? "query" : "queries"} (0/${total})`, - ); + // One-time warehouse preflight (before issuing any DESCRIBE). A single + // warehouses.get classifies the warehouse so we can skip the whole describe + // batch when it can't serve this run, instead of letting every query fail + // (and re-fail next run). Reuses this file's degrade/classify helpers so a + // not-ready warehouse degrades exactly like a per-query outage. + let decision: ReturnType = "proceed"; + let fatalMessage = ""; + if (mode === "non-blocking") { + // `non-blocking` never describes and must make ZERO warehouse round-trips: + // skip the probe entirely (no getWarehouseState) and go straight to + // degradeAll. A foreground/one-shot run can't describe in the background, + // so it emits best-available types (reused cache or `unknown`) and returns + // now. + decision = "degradeAll"; + } else { + try { + const state = await getWarehouseState(client, warehouseId); + decision = decidePreflight(state, mode); + if (decision === "fatal") { + fatalMessage = `warehouse ${warehouseId} is ${state}`; + } + if (decision === "startWaitProceed") { + // Stopped/stopping warehouse: nudge it out of the stopped state, then + // poll to RUNNING. treatStoppedAsTransient rides out the stale + // pre-start STOPPED/STOPPING reading the start hasn't propagated past + // yet — only DELETED/DELETING (or the deadline) ends the wait early. + await startWarehouse(client, warehouseId); + const final = await waitUntilRunning(client, warehouseId, { + maxMs: PREFLIGHT_WAIT_MAX_MS, + treatStoppedAsTransient: true, + }); + if (final === "RUNNING") { + decision = "proceed"; + } else { + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId} did not reach RUNNING (now ${final})`; + } + } + if (decision === "waitThenProceed") { + const final = await waitUntilRunning(client, warehouseId, { + maxMs: PREFLIGHT_WAIT_MAX_MS, + }); + if (final === "RUNNING") { + decision = "proceed"; + } else { + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId} did not reach RUNNING (now ${final})`; + } + } + } catch (err) { + if (isConnectivityError(err)) { + // Warehouse unreachable (transient outage): degrade silently like a + // per-query connectivity failure — never fail a build on a blip. + decision = "degradeAll"; + } else { + // Auth, bad warehouse id, malformed config, or a timed-out wait: fatal. + decision = "fatal"; + fatalMessage = `warehouse ${warehouseId}: ${getErrorDiagnostic(err)}`; + } + } + } - const describeOne = async ({ - index, - queryName, - sql, - sqlHash, - cleanedSql, - }: (typeof uncachedQueries)[number]): Promise => { - const result = (await client.statementExecution.executeStatement({ - statement: `DESCRIBE QUERY ${cleanedSql}`, - warehouse_id: warehouseId, - })) as DatabricksStatementExecutionResponse; - - completed++; - spinner.update( - `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + if (decision !== "proceed") { + // degradeAll or fatal: skip DESCRIBE entirely. Every uncached query gets a + // degraded schema (reused cache or `unknown`); fatal additionally records + // a fatalError per query so the caller fails the build after writing. + const kind = decision === "fatal" ? "fatal" : "connectivity"; + for (const { index, queryName, sql, sqlHash } of uncachedQueries) { + freshResults.push({ + index, + schema: { + name: queryName, + type: degradedType(cache, queryName, sql, sqlHash), + }, + }); + if (decision === "fatal") { + fatalErrors.push({ name: queryName, message: fatalMessage }); + logEntries.push({ + queryName, + status: "MISS", + kind, + error: { message: fatalMessage }, + }); + } else { + logEntries.push({ queryName, status: "MISS", kind }); + } + } + } else { + let completed = 0; + const total = uncachedQueries.length; + spinner.start( + `Describing ${total} ${total === 1 ? "query" : "queries"} (0/${total})`, ); - logger.debug( - "DESCRIBE result for %s: state=%s, rows=%d", + const describeOne = async ({ + index, queryName, - result.status.state, - result.result?.data_array?.length ?? 0, - ); + sql, + sqlHash, + cleanedSql, + }: (typeof uncachedQueries)[number]): Promise => { + const result = (await client.statementExecution.executeStatement({ + statement: `DESCRIBE QUERY ${cleanedSql}`, + warehouse_id: warehouseId, + })) as DatabricksStatementExecutionResponse; + + completed++; + spinner.update( + `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + ); + + logger.debug( + "DESCRIBE result for %s: state=%s, rows=%d", + queryName, + result.status.state, + result.result?.data_array?.length ?? 0, + ); - if (result.status.state === "FAILED") { - const sqlError = - result.status.error?.message || "Query execution failed"; - logger.warn("DESCRIBE failed for %s: %s", queryName, sqlError); - const type = generateUnknownResultQuery(sql, queryName); + if (result.status.state === "FAILED") { + // The warehouse was reachable and ran DESCRIBE, but the statement + // failed — a genuine SQL error (bad table, syntax, incompatible type). + const sqlError = + result.status.error?.message || "Query execution failed"; + // The failure is surfaced once, formatted, by the aggregated + // TypegenSyntaxError (and the summary table) — don't also log the raw + // message here or every SQL error prints twice in dev. + const type = generateUnknownResultQuery(sql, queryName); + return { + status: "syntax", + index, + schema: { name: queryName, type }, + error: parseError(sqlError), + }; + } + + if (result.status.state !== "SUCCEEDED") { + // Non-terminal state (PENDING/RUNNING) with no result rows: the + // warehouse is reachable but not ready (stopped, cold-starting, or + // busy). Degrade like a transient outage — reuse the last-good cached + // type when the SQL is unchanged, else emit `unknown`. Never "empty": + // treating this as empty would emit `result: unknown` AND discard the + // good cached type, silently throwing away working types. + return { + status: "unavailable", + index, + schema: { + name: queryName, + type: degradedType(cache, queryName, sql, sqlHash), + }, + }; + } + + const { type, hasResults } = convertToQueryType(result, sql, queryName); + if (!hasResults) { + // Described, but no result columns. Emit `unknown` and retry next run; + // do not cache (we never persist `result: unknown`). + return { status: "empty", index, schema: { name: queryName, type } }; + } return { - status: "fail", + status: "ok", index, schema: { name: queryName, type }, - cacheEntry: { hash: sqlHash, type, retry: true }, - error: parseError(sqlError), + cacheEntry: { hash: sqlHash, type, retry: false }, }; - } - - const { type, hasResults } = convertToQueryType(result, sql, queryName); - return { - status: "ok", - index, - schema: { name: queryName, type }, - cacheEntry: { hash: sqlHash, type, retry: !hasResults }, }; - }; - // Process in chunks, saving cache after each chunk - const processBatchResults = ( - settled: PromiseSettledResult[], - batchOffset: number, - ) => { - for (let i = 0; i < settled.length; i++) { - const entry = settled[i]; - const { queryName } = uncachedQueries[batchOffset + i]; - - if (entry.status === "fulfilled") { - const res = entry.value; - freshResults.push({ index: res.index, schema: res.schema }); - cache.queries[queryName] = res.cacheEntry; - logEntries.push({ - queryName, - status: "MISS", - failed: res.status === "fail", - error: res.status === "fail" ? res.error : undefined, - }); - } else { - const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; - const reason = - entry.reason instanceof Error - ? entry.reason.message - : String(entry.reason); - logger.warn("DESCRIBE rejected for %s: %s", queryName, reason); - const type = generateUnknownResultQuery(sql, queryName); - freshResults.push({ index, schema: { name: queryName, type } }); - cache.queries[queryName] = { hash: sqlHash, type, retry: true }; - logEntries.push({ - queryName, - status: "MISS", - failed: true, - error: parseError(reason), - }); + // Process in chunks, saving cache after each chunk + const processBatchResults = ( + settled: PromiseSettledResult[], + batchOffset: number, + ) => { + for (let i = 0; i < settled.length; i++) { + const entry = settled[i]; + const { queryName } = uncachedQueries[batchOffset + i]; + + if (entry.status === "fulfilled") { + const res = entry.value; + freshResults.push({ index: res.index, schema: res.schema }); + + if (res.status === "ok") { + // Only a successful describe with a result schema is cached. + cache.queries[queryName] = res.cacheEntry; + logEntries.push({ queryName, status: "MISS" }); + } else if (res.status === "syntax") { + // Genuine SQL error — record it for the caller's prod/dev gate. + // Not cached: re-described next run so a fixed query recovers. + syntaxErrors.push({ + name: queryName, + message: res.error.message, + }); + logEntries.push({ + queryName, + status: "MISS", + kind: "syntax", + error: res.error, + }); + } else if (res.status === "empty") { + // status === "empty": described, no columns. Soft unknown, not cached. + logEntries.push({ queryName, status: "MISS", kind: "empty" }); + } else { + // status === "unavailable": non-terminal DESCRIBE (warehouse + // stopped/cold-starting/busy). Degrade like a transient outage: + // tag OFFLINE, count as degraded, never cache. + logEntries.push({ + queryName, + status: "MISS", + kind: "connectivity", + }); + } + } else { + // executeStatement rejected without a normal StatementExecution result. + // Only structured transport/connectivity failures are treated as + // offline; auth, bad warehouse IDs, malformed requests, and SDK/config + // failures stay fatal so users fix the underlying setup issue. + completed++; + spinner.update( + `Describing ${total} ${total === 1 ? "query" : "queries"} (${completed}/${total})`, + ); + + const { sql, sqlHash, index } = uncachedQueries[batchOffset + i]; + const reason = getErrorDiagnostic(entry.reason); + const error = parseError(reason); + const priorEntry = cache.queries[queryName]; + const canReusePrior = + priorEntry?.hash === sqlHash && !priorEntry.retry; + const type = degradedType(cache, queryName, sql, sqlHash); + freshResults.push({ index, schema: { name: queryName, type } }); + + if (!isConnectivityError(entry.reason)) { + fatalErrors.push({ name: queryName, message: error.message }); + logEntries.push({ + queryName, + status: "MISS", + kind: "fatal", + error, + }); + continue; + } + + logger.warn( + "DESCRIBE unreachable for %s: %s — %s", + queryName, + reason, + canReusePrior + ? "reusing last cached type" + : "emitting unknown (no matching cache)", + ); + logEntries.push({ + queryName, + status: "MISS", + kind: "connectivity", + error, + }); + } } - } - }; + }; - if (uncachedQueries.length > concurrency) { - for (let b = 0; b < uncachedQueries.length; b += concurrency) { - const batch = uncachedQueries.slice(b, b + concurrency); - const batchResults = await Promise.allSettled(batch.map(describeOne)); - processBatchResults(batchResults, b); + if (uncachedQueries.length > concurrency) { + for (let b = 0; b < uncachedQueries.length; b += concurrency) { + const batch = uncachedQueries.slice(b, b + concurrency); + const batchResults = await Promise.allSettled(batch.map(describeOne)); + processBatchResults(batchResults, b); + await saveCache(cache); + } + } else { + const settled = await Promise.allSettled( + uncachedQueries.map(describeOne), + ); + processBatchResults(settled, 0); await saveCache(cache); } - } else { - const settled = await Promise.allSettled( - uncachedQueries.map(describeOne), - ); - processBatchResults(settled, 0); - await saveCache(cache); - } - spinner.stop(""); + spinner.stop(""); + } } const elapsed = ((performance.now() - startTime) / 1000).toFixed(2); @@ -507,36 +868,69 @@ export async function generateQueriesFromDescribe( ); console.log(` ${separator}`); for (const entry of logEntries) { - const tag = entry.failed - ? pc.bold(pc.red("ERROR")) - : entry.status === "HIT" - ? `cache ${pc.bold(pc.green("HIT "))}` - : `cache ${pc.bold(pc.yellow("MISS "))}`; + let tag: string; + switch (entry.kind) { + case "syntax": + tag = pc.bold(pc.red("SQL ERR")); + break; + case "connectivity": + tag = pc.bold(pc.yellow("OFFLINE")); + break; + case "empty": + tag = pc.dim("EMPTY "); + break; + case "fatal": + tag = pc.bold(pc.red("FATAL ")); + break; + default: + tag = + entry.status === "HIT" + ? `cache ${pc.bold(pc.green("HIT "))}` + : `cache ${pc.bold(pc.yellow("MISS "))}`; + } const rawName = entry.queryName.padEnd(maxNameLen); - const name = entry.failed ? pc.dim(pc.strikethrough(rawName)) : rawName; + // Only genuine SQL errors are struck through. Connectivity/empty kept a + // usable type (reused or unknown), so they read as degraded, not broken. + const name = + entry.kind === "syntax" || entry.kind === "fatal" + ? pc.dim(pc.strikethrough(rawName)) + : rawName; const errorCode = entry.error?.message.match(/\[([^\]]+)\]/)?.[1]; const reason = errorCode ? ` ${pc.dim(errorCode)}` : ""; console.log(` ${tag} ${name}${reason}`); } const newCount = logEntries.filter( - (e) => e.status === "MISS" && !e.failed, + (e) => e.status === "MISS" && !e.kind, ).length; - const cacheCount = logEntries.filter( - (e) => e.status === "HIT" && !e.failed, + const cacheCount = logEntries.filter((e) => e.status === "HIT").length; + const syntaxCount = logEntries.filter((e) => e.kind === "syntax").length; + const offlineCount = logEntries.filter( + (e) => e.kind === "connectivity", ).length; - const errorCount = logEntries.filter((e) => e.failed).length; + const emptyCount = logEntries.filter((e) => e.kind === "empty").length; + const fatalCount = logEntries.filter((e) => e.kind === "fatal").length; console.log(` ${separator}`); const parts = [`${newCount} new`, `${cacheCount} from cache`]; - if (errorCount > 0) - parts.push(`${errorCount} ${errorCount === 1 ? "error" : "errors"}`); + if (syntaxCount > 0) + parts.push( + `${syntaxCount} SQL ${syntaxCount === 1 ? "error" : "errors"}`, + ); + if (offlineCount > 0) parts.push(`${offlineCount} degraded`); + if (emptyCount > 0) parts.push(`${emptyCount} empty`); + if (fatalCount > 0) + parts.push( + `${fatalCount} fatal ${fatalCount === 1 ? "error" : "errors"}`, + ); console.log(` ${parts.join(", ")}. ${pc.dim(`${elapsed}s`)}`); console.log(""); } // Merge and sort by original file index for deterministic output - return [...cachedResults, ...freshResults] + const schemas = [...cachedResults, ...freshResults] .sort((a, b) => a.index - b.index) .map((r) => r.schema); + + return { schemas, syntaxErrors, fatalErrors }; } /** diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts index ac43ef9e2..4ebcaac2b 100644 --- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts +++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts @@ -4,6 +4,12 @@ const mocks = vi.hoisted(() => ({ readdir: vi.fn(), readFile: vi.fn(), executeStatement: vi.fn(), + // Warehouse preflight probe. Defaults to RUNNING so every existing describe + // test takes the "proceed" path unchanged; override per-test to exercise + // stopped/starting/unreachable preflight branches. + getWarehouse: vi.fn(() => ({ state: "RUNNING" })), + // warehouses.start — only the blocking startWaitProceed path calls this. + startWarehouse: vi.fn(), spinnerStop: vi.fn(), spinnerPrintDetail: vi.fn(), loadCache: vi.fn(() => ({ version: "2", queries: {} })), @@ -20,6 +26,7 @@ vi.mock("node:fs/promises", () => ({ vi.mock("@databricks/sdk-experimental", () => ({ WorkspaceClient: vi.fn(() => ({ statementExecution: { executeStatement: mocks.executeStatement }, + warehouses: { get: mocks.getWarehouse, start: mocks.startWarehouse }, })), })); @@ -38,6 +45,35 @@ vi.mock("../cache", async (importOriginal) => { }); const { generateQueriesFromDescribe } = await import("../query-registry"); +const { CACHE_VERSION, hashSQL } = await import("../cache"); + +// The default mode is "non-blocking", which never probes the warehouse and never +// describes. The bulk of these tests exercise the DESCRIBE / classify path, so +// they run in "blocking" mode (probe → proceed → describe) by default. Tests +// that specifically assert the non-blocking short-circuit pass an explicit mode. +function describeQueries( + queryFolder: string, + warehouseId: string, + options: Parameters[2] = {}, +) { + return generateQueriesFromDescribe(queryFolder, warehouseId, { + mode: "blocking", + ...options, + }); +} + +// Sentinel for a previously-generated good type. The code passes cached types +// through verbatim, so equality proves reuse rather than regeneration. +const CACHED_GOOD_TYPE = "RESULT_REUSED_FROM_CACHE"; + +// The `queries` map of the cache object last handed to saveCache — i.e. what +// actually got persisted this run. +const lastSavedQueries = () => + ( + mocks.saveCache.mock.calls.at(-1)?.[0] as + | { queries: Record } + | undefined + )?.queries; function succeededResult(columns: [string, string, string | null][]) { return { @@ -50,6 +86,9 @@ function succeededResult(columns: [string, string, string | null][]) { describe("generateQueriesFromDescribe", () => { beforeEach(() => { vi.clearAllMocks(); + // Re-establish the RUNNING default so a per-test preflight override (e.g. + // mockReturnValue/mockImplementation) never leaks into the next test. + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); }); test("success path — returns query schema", async () => { @@ -64,7 +103,10 @@ describe("generateQueriesFromDescribe", () => { ]), ); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("users"); @@ -72,6 +114,10 @@ describe("generateQueriesFromDescribe", () => { expect(schemas[0].type).toContain("name: string"); expect(mocks.spinnerStop).toHaveBeenCalledWith(""); expect(mocks.saveCache).toHaveBeenCalledTimes(1); + // clean success: cached, and not flagged as a syntax error + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + expect(lastSavedQueries()?.users.type).toContain("id: number"); }); test("FAILED status with error message — reports SQL error and produces unknown result type", async () => { @@ -85,7 +131,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("bad_table"); @@ -102,7 +148,7 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED" }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].name).toBe("query"); @@ -127,7 +173,7 @@ describe("generateQueriesFromDescribe", () => { }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(2); @@ -143,7 +189,7 @@ describe("generateQueriesFromDescribe", () => { expect(mocks.saveCache).toHaveBeenCalledTimes(1); }); - test("all queries fail — caches with retry flag, all unknown result types", async () => { + test("all queries fail (connectivity + syntax) — all produce unknown result types", async () => { mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); mocks.readFile .mockResolvedValueOnce("SELECT * FROM table_a") @@ -156,7 +202,10 @@ describe("generateQueriesFromDescribe", () => { status: { state: "FAILED", error: { message: "Table not found" } }, }); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); expect(schemas).toHaveLength(2); expect(schemas[0].name).toBe("a"); @@ -166,6 +215,13 @@ describe("generateQueriesFromDescribe", () => { // saveCache called once after all parallel queries complete expect(mocks.saveCache).toHaveBeenCalledTimes(1); + // a = connectivity (rejected) → NOT a syntax error; b = FAILED → syntax error + expect(syntaxErrors).toEqual([{ name: "b", message: "Table not found" }]); + // neither a connectivity failure nor a SQL error is classified as fatal + expect(fatalErrors).toEqual([]); + // neither failure is persisted to the cache + expect(lastSavedQueries()).not.toHaveProperty("a"); + expect(lastSavedQueries()).not.toHaveProperty("b"); }); test("concurrency batching — saves cache after each batch", async () => { @@ -181,7 +237,7 @@ describe("generateQueriesFromDescribe", () => { .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) .mockResolvedValueOnce(succeededResult([["id", "INT", null]])); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123", { + const { schemas } = await describeQueries("/queries", "wh-123", { concurrency: 2, }); @@ -199,13 +255,652 @@ describe("generateQueriesFromDescribe", () => { mocks.readFile.mockResolvedValue( "-- @param status STRING\nSELECT * FROM t WHERE status = :status AND org = :org", ); - mocks.executeStatement.mockRejectedValueOnce(new Error("timeout")); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("connect ETIMEDOUT"), { code: "ETIMEDOUT" }), + ); - const schemas = await generateQueriesFromDescribe("/queries", "wh-123"); + const { schemas } = await describeQueries("/queries", "wh-123"); expect(schemas).toHaveLength(1); expect(schemas[0].type).toContain("status: SQLStringMarker"); expect(schemas[0].type).toContain("org: SQLTypeMarker"); expect(schemas[0].type).toContain("result: unknown"); }); + + test("connectivity failure with stale cache emits unknown for the current SQL", async () => { + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue(sql); + // A prior good type cached under a STALE hash: the query is a cache MISS + // (so DESCRIBE is attempted). If the warehouse is unreachable, do not + // publish the stale result columns for different SQL text. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + users: { hash: "stale-hash", type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("connect ECONNREFUSED"), { + code: "ECONNREFUSED", + }), + ); + + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).not.toBe(CACHED_GOOD_TYPE); + expect(schemas[0].type).toContain("result: unknown"); + // connectivity is never recorded as a syntax error + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + // the existing good entry is left intact (not overwritten with unknown) + expect(lastSavedQueries()?.users).toEqual({ + hash: "stale-hash", + type: CACHED_GOOD_TYPE, + retry: false, + }); + }); + + test("fatal rejected DESCRIBE request is not downgraded to offline", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + new Error("PERMISSION_DENIED: missing warehouse permission"), + ); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([ + { + name: "users", + message: "PERMISSION_DENIED: missing warehouse permission", + }, + ]); + expect(mocks.saveCache).toHaveBeenCalledTimes(1); + expect(lastSavedQueries()).not.toHaveProperty("users"); + }); + + test("HTTP 503 wrapper error is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new Error("Service unavailable"), { statusCode: 503 }), + ); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test.each([ + ["HTTP 502", Object.assign(new Error("Bad gateway"), { status: 502 })], + [ + "HTTP 504 response", + Object.assign(new Error("Gateway timeout"), { + response: { status: 504 }, + }), + ], + [ + "EAI_NODATA", + Object.assign(new Error("DNS lookup failed"), { code: "EAI_NODATA" }), + ], + [ + "Envoy upstream disconnect", + new Error("upstream connect error or disconnect/reset before headers"), + ], + ])("%s is classified as connectivity", async (_name, error) => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce(error); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test("mixed syntax and fatal failures are both returned", async () => { + mocks.readdir.mockResolvedValue(["syntax.sql", "fatal.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT * FROM missing") + .mockResolvedValueOnce("SELECT * FROM auth_blocked"); + mocks.executeStatement + .mockResolvedValueOnce({ + statement_id: "stmt-syntax", + status: { + state: "FAILED", + error: { message: "Table not found" }, + }, + }) + .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); + + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas).toHaveLength(2); + expect(syntaxErrors).toEqual([ + { name: "syntax", message: "Table not found" }, + ]); + expect(fatalErrors).toEqual([ + { name: "fatal", message: "PERMISSION_DENIED" }, + ]); + }); + + test("undici cause code is classified as connectivity even when wrapper message is generic fetch failed", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign(new TypeError("fetch failed"), { + cause: { code: "UND_ERR_CONNECT_TIMEOUT" }, + }), + ); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test("SDK DNS wrapper (Can't connect to ..., code 500) is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + Object.assign( + new Error( + "Can't connect to https://x.cloud.databricks.com/api/2.0/sql/statements", + ), + { code: 500 }, + ), + ); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([]); + }); + + test("TLS certificate message is classified as connectivity", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + mocks.executeStatement.mockRejectedValueOnce( + new Error("unable to verify the first certificate"), + ); + + const { fatalErrors } = await describeQueries("/queries", "wh-123"); + + expect(fatalErrors).toEqual([]); + }); + + test("bare timeout and fetch failed messages are not overmatched as connectivity", async () => { + mocks.readdir.mockResolvedValue(["timeout.sql", "oauth.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM timeout") + .mockResolvedValueOnce("SELECT id FROM oauth"); + mocks.executeStatement + .mockRejectedValueOnce( + new Error("INVALID_PARAMETER_VALUE: timeout must be > 0"), + ) + .mockRejectedValueOnce( + Object.assign(new TypeError("fetch failed"), { + cause: { code: "EXPIRED_OAUTH_TOKEN", message: "token expired" }, + }), + ); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas).toHaveLength(2); + expect(fatalErrors).toEqual([ + { + name: "timeout", + message: "INVALID_PARAMETER_VALUE: timeout must be > 0", + }, + { + name: "oauth", + message: "fetch failed: token expired: EXPIRED_OAUTH_TOKEN", + }, + ]); + }); + + test("successful describes in a fatal batch are saved", async () => { + mocks.readdir.mockResolvedValue(["good.sql", "bad_auth.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM good") + .mockResolvedValueOnce("SELECT id FROM bad_auth"); + mocks.executeStatement + .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) + .mockRejectedValueOnce(new Error("PERMISSION_DENIED")); + + const { schemas, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("id: number"); + expect(schemas[1].type).toContain("result: unknown"); + expect(fatalErrors).toEqual([ + { name: "bad_auth", message: "PERMISSION_DENIED" }, + ]); + expect(lastSavedQueries()?.good.type).toContain("id: number"); + expect(lastSavedQueries()).not.toHaveProperty("bad_auth"); + }); + + test("empty result (described, no columns) is unknown, not a syntax error, not cached", async () => { + mocks.readdir.mockResolvedValue(["empty.sql"]); + mocks.readFile.mockResolvedValue("SELECT 1"); + mocks.executeStatement.mockResolvedValue(succeededResult([])); + + const { schemas, syntaxErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(lastSavedQueries()).not.toHaveProperty("empty"); + }); + + test("PENDING (non-terminal, warehouse not ready) degrades to unknown, not empty, not cached", async () => { + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM users"); + // Warehouse stopped/cold-starting: hybrid DESCRIBE returns a non-terminal + // state with no result rows. Must degrade like a transient outage, not be + // misreported as EMPTY (which would discard a good cached type). + mocks.executeStatement.mockResolvedValue({ + statement_id: "stmt-1", + status: { state: "PENDING" }, + }); + + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas).toHaveLength(1); + expect(schemas[0].name).toBe("users"); + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + // a non-ready warehouse must never persist `result: unknown` + expect(lastSavedQueries()).not.toHaveProperty("users"); + }); + + test("PENDING reuses a prior good cached type when the SQL hash matches", async () => { + // `users.sql` and `users.obo.sql` normalize to the same query name and hold + // identical SQL (same hash). With concurrency=1 the first DESCRIBE SUCCEEDS + // and its batch commits a good cached type; the second batch comes back + // non-terminal (warehouse not ready) and must reuse that freshly-cached good + // type rather than overwrite it with unknown. + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql", "users.obo.sql"]); + mocks.readFile.mockResolvedValue(sql); + mocks.executeStatement + .mockResolvedValueOnce(succeededResult([["id", "INT", null]])) + .mockResolvedValueOnce({ + statement_id: "stmt-pending", + status: { state: "RUNNING" }, + }); + + const { schemas, syntaxErrors, fatalErrors } = await describeQueries( + "/queries", + "wh-123", + { + concurrency: 1, + }, + ); + + expect(schemas).toHaveLength(2); + // both entries resolve to the good type — the PENDING one reuses the cache + expect(schemas[0].type).toContain("id: number"); + expect(schemas[1].type).toContain("id: number"); + expect(schemas[1].type).not.toContain("result: unknown"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + // the good cached type persists; PENDING never overwrites it with unknown + expect(lastSavedQueries()?.users.type).toContain("id: number"); + }); + + test("syntax error (FAILED) is recorded in syntaxErrors and not cached", async () => { + mocks.readdir.mockResolvedValue(["broken.sql"]); + mocks.readFile.mockResolvedValue("SELECT * FROM missing"); + mocks.executeStatement.mockResolvedValue({ + statement_id: "stmt", + status: { + state: "FAILED", + error: { message: "Table or view not found: missing" }, + }, + }); + + const { schemas, syntaxErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(schemas[0].type).toContain("result: unknown"); + expect(syntaxErrors).toEqual([ + { name: "broken", message: "Table or view not found: missing" }, + ]); + expect(lastSavedQueries()).not.toHaveProperty("broken"); + }); + + test("cache HIT serves the stored type without calling the warehouse", async () => { + const sql = "SELECT id FROM t"; + mocks.readdir.mockResolvedValue(["t.sql"]); + mocks.readFile.mockResolvedValue(sql); + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + t: { hash: hashSQL(sql), type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + + const { schemas, syntaxErrors } = await describeQueries( + "/queries", + "wh-123", + ); + + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); + expect(syntaxErrors).toEqual([]); + }); + + test("stale retry-flagged cache entry is re-described, not reused", async () => { + const sql = "SELECT id FROM t"; + mocks.readdir.mockResolvedValue(["t.sql"]); + mocks.readFile.mockResolvedValue(sql); + // Matching hash but retry:true (legacy poisoned entry) → must NOT be a HIT. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + t: { hash: hashSQL(sql), type: "STALE_UNKNOWN", retry: true }, + }, + }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const { schemas } = await describeQueries("/queries", "wh-123"); + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas[0].type).toContain("id: number"); + expect(schemas[0].type).not.toBe("STALE_UNKNOWN"); + }); + + describe("warehouse preflight", () => { + test("STOPPED + blocking mode — starts the warehouse, waits for RUNNING, then describes", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STOPPED (→ startWaitProceed): warehouses.start fires, + // then waitUntilRunning polls the stale STOPPED once more before RUNNING. + // After RUNNING, DESCRIBE runs normally. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const promise = generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + // Drive the wait loop's backoff sleep(s) so it can re-poll and observe + // RUNNING. Run pending timers until the work settles. + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; + + // The stopped warehouse was started, then described once it came up. + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.startWarehouse).toHaveBeenCalledWith({ id: "wh-123" }); + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas).toHaveLength(1); + expect(schemas[0].name).toBe("a"); + expect(schemas[0].type).toContain("id: number"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + } finally { + vi.useRealTimers(); + } + }); + + test.each(["DELETED", "DELETING"] as const)( + "%s + blocking mode — fatal per query after schemas are written, never describes", + async (state) => { + mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM a") + .mockResolvedValueOnce("SELECT id FROM b"); + mocks.getWarehouse.mockReturnValue({ state }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + + // A deleted/deleting warehouse is the only fatal case: never started, + // never described; one fatal entry per uncached query. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([ + { name: "a", message: `warehouse wh-123 is ${state}` }, + { name: "b", message: `warehouse wh-123 is ${state}` }, + ]); + expect(syntaxErrors).toEqual([]); + // Schemas are still produced (degraded) so the .d.ts is written before + // generateFromEntryPoint throws on the recorded fatalErrors. + expect(schemas).toHaveLength(2); + expect(schemas[0].type).toContain("result: unknown"); + expect(schemas[1].type).toContain("result: unknown"); + }, + ); + + test("STOPPED + blocking — start succeeds but warehouse never reaches RUNNING is fatal", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STOPPED → start fires, but the warehouse then reports + // DELETED (a genuinely terminal state even with treatStoppedAsTransient). + // The wait resolves non-RUNNING → fatal; schemas still written. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STOPPED" }) + .mockReturnValue({ state: "DELETED" }); + + const promise = generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; + + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([ + { + name: "a", + message: "warehouse wh-123 did not reach RUNNING (now DELETED)", + }, + ]); + expect(schemas[0].type).toContain("result: unknown"); + } finally { + vi.useRealTimers(); + } + }); + + test("non-blocking mode — degrades silently without probing, even when STOPPED", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Even a STOPPED warehouse is irrelevant: non-blocking never probes. + mocks.getWarehouse.mockReturnValue({ state: "STOPPED" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "non-blocking", + }); + + // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE. + expect(mocks.getWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([]); + expect(syntaxErrors).toEqual([]); + expect(schemas[0].type).toContain("result: unknown"); + // degraded, never a fatal failure + expect(lastSavedQueries()).toBeUndefined(); + }); + + test("STARTING + blocking — waits for RUNNING, then describes normally", async () => { + vi.useFakeTimers(); + try { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + // Preflight sees STARTING (→ waitThenProceed); waitUntilRunning polls + // STARTING once more, then RUNNING. After that, DESCRIBE runs. + mocks.getWarehouse + .mockReturnValueOnce({ state: "STARTING" }) + .mockReturnValueOnce({ state: "STARTING" }) + .mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const promise = generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + // Drive the wait loop's backoff sleep(s) so it can re-poll and observe + // RUNNING. Run pending timers until the work settles. + await vi.runAllTimersAsync(); + const { schemas, syntaxErrors, fatalErrors } = await promise; + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas).toHaveLength(1); + expect(schemas[0].type).toContain("id: number"); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + } finally { + vi.useRealTimers(); + } + }); + + test("preflight connectivity error — degradeAll, never describes", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + mocks.getWarehouse.mockImplementation(() => { + throw Object.assign( + new Error("Can't connect to https://x.cloud.databricks.com"), + { code: 500 }, + ); + }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "blocking", + }); + + // Unreachable warehouse degrades silently — even in blocking mode. + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(fatalErrors).toEqual([]); + expect(syntaxErrors).toEqual([]); + expect(schemas[0].type).toContain("result: unknown"); + }); + + test("RUNNING preflight — describes normally", async () => { + mocks.readdir.mockResolvedValue(["a.sql"]); + mocks.readFile.mockResolvedValue("SELECT id FROM a"); + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + mocks.executeStatement.mockResolvedValue( + succeededResult([["id", "INT", null]]), + ); + + const { schemas, fatalErrors } = await generateQueriesFromDescribe( + "/queries", + "wh-123", + { mode: "blocking" }, + ); + + expect(mocks.executeStatement).toHaveBeenCalledTimes(1); + expect(schemas[0].type).toContain("id: number"); + expect(fatalErrors).toEqual([]); + }); + + test("non-blocking mode — skips probe + describe even when warehouse is RUNNING", async () => { + mocks.readdir.mockResolvedValue(["a.sql", "b.sql"]); + mocks.readFile + .mockResolvedValueOnce("SELECT id FROM a") + .mockResolvedValueOnce("SELECT id FROM b"); + // A RUNNING warehouse would normally take the proceed path and describe + // every query. In `non-blocking` mode the warehouse is never even probed. + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "non-blocking", + }); + + // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE. + expect(mocks.getWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + // Best-available types: no cache seeded → every query degrades to unknown. + expect(schemas).toHaveLength(2); + expect(schemas[0].name).toBe("a"); + expect(schemas[0].type).toContain("result: unknown"); + expect(schemas[1].name).toBe("b"); + expect(schemas[1].type).toContain("result: unknown"); + // Degraded, never a failure. + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + }); + + test("non-blocking mode — reuses the cached type when the SQL hash matches", async () => { + const sql = "SELECT id FROM users"; + mocks.readdir.mockResolvedValue(["users.sql"]); + mocks.readFile.mockResolvedValue(sql); + // Seed a last-good cached type under the current SQL hash. non-blocking + // serves it via the normal cache HIT path — still no probe, no DESCRIBE. + mocks.loadCache.mockReturnValueOnce({ + version: CACHE_VERSION, + queries: { + users: { hash: hashSQL(sql), type: CACHED_GOOD_TYPE, retry: false }, + }, + }); + mocks.getWarehouse.mockReturnValue({ state: "RUNNING" }); + + const { schemas, syntaxErrors, fatalErrors } = + await generateQueriesFromDescribe("/queries", "wh-123", { + mode: "non-blocking", + }); + + expect(mocks.getWarehouse).not.toHaveBeenCalled(); + expect(mocks.executeStatement).not.toHaveBeenCalled(); + expect(schemas).toHaveLength(1); + expect(schemas[0].type).toBe(CACHED_GOOD_TYPE); + expect(syntaxErrors).toEqual([]); + expect(fatalErrors).toEqual([]); + }); + }); }); diff --git a/packages/appkit/src/type-generator/tests/index.test.ts b/packages/appkit/src/type-generator/tests/index.test.ts index bd2052273..4c37699ad 100644 --- a/packages/appkit/src/type-generator/tests/index.test.ts +++ b/packages/appkit/src/type-generator/tests/index.test.ts @@ -1,7 +1,31 @@ import fs from "node:fs"; import path from "node:path"; -import { afterAll, beforeAll, describe, expect, test } from "vitest"; -import { generateFromEntryPoint } from "../index"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + test, + vi, +} from "vitest"; + +const mocks = vi.hoisted(() => ({ + generateQueriesFromDescribe: vi.fn(), +})); + +// Mock only the warehouse-describe step; index.ts owns the throw decision we +// want to exercise (syntax errors fatal, connectivity failures non-fatal). +vi.mock("../query-registry", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + generateQueriesFromDescribe: mocks.generateQueriesFromDescribe, + }; +}); + +const { generateFromEntryPoint, TypegenFatalError, TypegenSyntaxError } = + await import("../index"); const outputDir = path.join(__dirname, "__output__"); @@ -52,3 +76,125 @@ describe("generateFromEntryPoint", () => { expect(content).toContain("interface QueryRegistry {}"); }); }); + +describe("generateFromEntryPoint — query failure handling", () => { + const failuresDir = path.join(__dirname, "__output_failures__"); + const outFile = path.join(failuresDir, "analytics.d.ts"); + + const unknownSchema = (name: string) => ({ + name, + type: `{ name: "${name}"; parameters: Record; result: unknown; }`, + }); + + beforeAll(() => { + if (!fs.existsSync(failuresDir)) { + fs.mkdirSync(failuresDir, { recursive: true }); + } + }); + + afterAll(() => { + if (fs.existsSync(failuresDir)) { + fs.rmSync(failuresDir, { recursive: true }); + } + }); + + beforeEach(() => { + vi.clearAllMocks(); + }); + + test("throws TypegenSyntaxError when a query has a genuine SQL error", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad")], + syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + fatalErrors: [], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenSyntaxError); + }); + + test("TypegenSyntaxError includes fatal queries from a mixed failure", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad_sql"), unknownSchema("bad_auth")], + syntaxErrors: [{ name: "bad_sql", message: "Table not found" }], + fatalErrors: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toMatchObject({ + name: "TypegenSyntaxError", + fatalQueries: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain("bad_auth"); + }); + + test("does not throw when only connectivity failures occurred (warehouse down)", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("a"), unknownSchema("b")], + syntaxErrors: [], + fatalErrors: [], + }); + + // The reported bug: a down warehouse must NOT crash type generation. + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).resolves.toBeUndefined(); + }); + + test("writes the .d.ts before throwing on a syntax error", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad")], + syntaxErrors: [{ name: "bad", message: "Table not found: bad" }], + fatalErrors: [], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenSyntaxError); + + // Types are emitted even on failure so the build/dev still has a valid file. + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain( + "interface QueryRegistry", + ); + }); + + test("throws TypegenFatalError after writing the .d.ts for non-syntax fatal describe errors", async () => { + mocks.generateQueriesFromDescribe.mockResolvedValue({ + schemas: [unknownSchema("bad_auth")], + syntaxErrors: [], + fatalErrors: [{ name: "bad_auth", message: "PERMISSION_DENIED" }], + }); + + await expect( + generateFromEntryPoint({ + outFile, + queryFolder: "/queries", + warehouseId: "wh-1", + }), + ).rejects.toThrow(TypegenFatalError); + + expect(fs.existsSync(outFile)).toBe(true); + expect(fs.readFileSync(outFile, "utf-8")).toContain("bad_auth"); + }); +}); diff --git a/packages/appkit/src/type-generator/tests/preflight.test.ts b/packages/appkit/src/type-generator/tests/preflight.test.ts new file mode 100644 index 000000000..11d5b6f9e --- /dev/null +++ b/packages/appkit/src/type-generator/tests/preflight.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, test } from "vitest"; +import { + decidePreflight, + type PreflightDecision, + type PreflightMode, +} from "../preflight"; +import type { WarehouseState } from "../warehouse-status"; + +// Every (state × mode) pair the policy must implement, plus an unknown-state +// case that must fall through to "proceed". +const cases: Array<{ + state: WarehouseState | "WEIRD_FUTURE_STATE"; + mode: PreflightMode; + expected: PreflightDecision; +}> = [ + { state: "RUNNING", mode: "blocking", expected: "proceed" }, + + { state: "STARTING", mode: "blocking", expected: "waitThenProceed" }, + + // Stopped/stopping in blocking mode is worth starting + waiting, not fatal. + { state: "STOPPED", mode: "blocking", expected: "startWaitProceed" }, + { state: "STOPPING", mode: "blocking", expected: "startWaitProceed" }, + + // Only a deleted/deleting warehouse is a hard failure. + { state: "DELETED", mode: "blocking", expected: "fatal" }, + { state: "DELETING", mode: "blocking", expected: "fatal" }, + + // Unknown state: backstop is the describe loop, so don't block. + { state: "WEIRD_FUTURE_STATE", mode: "blocking", expected: "proceed" }, + + // `non-blocking` never describes: every state (even RUNNING) maps to degradeAll. + { state: "RUNNING", mode: "non-blocking", expected: "degradeAll" }, + { state: "STARTING", mode: "non-blocking", expected: "degradeAll" }, + { state: "STOPPED", mode: "non-blocking", expected: "degradeAll" }, + { state: "STOPPING", mode: "non-blocking", expected: "degradeAll" }, + { state: "DELETED", mode: "non-blocking", expected: "degradeAll" }, + { state: "DELETING", mode: "non-blocking", expected: "degradeAll" }, + { state: "WEIRD_FUTURE_STATE", mode: "non-blocking", expected: "degradeAll" }, +]; + +describe("decidePreflight", () => { + test.each(cases)( + "$state + $mode -> $expected", + ({ state, mode, expected }) => { + expect(decidePreflight(state as WarehouseState, mode)).toBe(expected); + }, + ); +}); diff --git a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts new file mode 100644 index 000000000..16153257f --- /dev/null +++ b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts @@ -0,0 +1,486 @@ +import { EventEmitter } from "node:events"; +import path from "node:path"; +import type { Plugin, ViteDevServer } from "vite"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import type { WarehouseState } from "../warehouse-status"; + +const mocks = vi.hoisted(() => ({ + generateFromEntryPoint: vi.fn(), + getWarehouseState: vi.fn(), + startWarehouse: vi.fn(), + waitUntilRunning: vi.fn(), +})); + +// Mock the module vite-plugin.ts pulls generateFromEntryPoint from. The error +// classes are imported for `instanceof` checks in the catch block, so they must +// remain real constructors — only the warehouse-touching entry point is spied. +vi.mock("../index", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + generateFromEntryPoint: mocks.generateFromEntryPoint, + }; +}); + +// Mock the warehouse-status helpers so the background watch is fully driven by +// the test (no real WorkspaceClient / SDK calls). +vi.mock("../warehouse-status", () => ({ + getWarehouseState: mocks.getWarehouseState, + startWarehouse: mocks.startWarehouse, + waitUntilRunning: mocks.waitUntilRunning, +})); + +// armWarehouseWatch constructs `new WorkspaceClient({})`. Stub the SDK so that +// constructor is inert in unit tests. +vi.mock("@databricks/sdk-experimental", () => ({ + WorkspaceClient: class {}, +})); + +const { appKitTypesPlugin } = await import("../vite-plugin"); + +// The plugin hooks are loosely typed on Vite's Plugin; cast to the shapes we +// actually drive so we can call them directly without a Vite build. +type ConfigResolvedHook = (config: { root: string }) => void; +type BuildStartHook = () => unknown; +type ConfigureServerHook = (server: ViteDevServer) => void; + +function getHook( + plugin: Plugin, + name: "configResolved" | "buildStart" | "configureServer", +): T { + const hook = plugin[name]; + if (typeof hook !== "function") { + throw new Error(`expected ${name} to be a function hook`); + } + return hook as T; +} + +/** + * A deferred promise whose settlement the test controls — used to hold a + * generateFromEntryPoint call "in flight" while a second trigger arrives, so we + * can observe single-flight coalescing deterministically. + */ +function deferred() { + let resolve!: () => void; + const promise = new Promise((r) => { + resolve = r; + }); + return { promise, resolve }; +} + +/** Construct the plugin and drive configResolved so outFile/watchFolders set. */ +function makeConfiguredPlugin() { + const plugin = appKitTypesPlugin(); + const configResolved = getHook(plugin, "configResolved"); + // configResolved derives outFile/watchFolders from config.root; a client + // sub-folder mirrors the real layout (projectRoot = config.root/..). + configResolved({ root: path.join(process.cwd(), "client") }); + return plugin; +} + +/** Drive configResolved + buildStart so generate() runs to the spy. */ +async function runPlugin() { + const plugin = makeConfiguredPlugin(); + const buildStart = getHook(plugin, "buildStart"); + await buildStart(); +} + +/** + * Minimal ViteDevServer stand-in: a chokidar-like `watcher` (EventEmitter with + * a no-op `add`) plus an `httpServer` EventEmitter so the close-cleanup hook can + * register. Returns the doubles so tests can emit "change"/"close". + */ +function makeFakeServer() { + const watcher = Object.assign(new EventEmitter(), { add: vi.fn() }); + const httpServer = new EventEmitter(); + const server = { watcher, httpServer } as unknown as ViteDevServer; + return { server, watcher, httpServer }; +} + +/** + * Settle the microtask queue so awaited generate/watch chains progress. The + * background watch threads several awaits (getWarehouseState → waitUntilRunning + * → runGenerate → generateOnce → generateFromEntryPoint), so drain generously. + */ +async function flush() { + for (let i = 0; i < 12; i++) { + await Promise.resolve(); + } +} + +describe("appKitTypesPlugin — generation mode", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + // Default the warehouse watch to a no-op so tests that don't exercise it + // aren't perturbed by a background regenerate. DELETED is the only state the + // watch leaves alone (it can't be started and blocking would be fatal), so + // it never starts/waits/regenerates — unlike RUNNING, which now describes in + // the background. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); + mocks.startWarehouse.mockResolvedValue(undefined); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + // A non-empty warehouse ID is required or generate() short-circuits before + // ever calling generateFromEntryPoint. + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test('foreground passes mode: "non-blocking" when NODE_ENV is not production', async () => { + process.env.NODE_ENV = "development"; + + await runPlugin(); + + // Dev foreground degrades instantly: it never blocks and never describes. + // (The warehouse watch is a DELETED no-op here, so there's no background + // regenerate — exactly one foreground call.) + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "non-blocking" }), + ); + }); + + test('passes mode: "blocking" when NODE_ENV is production', async () => { + process.env.NODE_ENV = "production"; + + await runPlugin(); + + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("skips generation when warehouse ID is absent", async () => { + delete process.env.DATABRICKS_WAREHOUSE_ID; + + await runPlugin(); + + expect(mocks.generateFromEntryPoint).not.toHaveBeenCalled(); + }); +}); + +describe("appKitTypesPlugin — single-flight generate", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + // Watch is a no-op here (DELETED leaves the degraded types alone) so it can't + // add stray generate calls — RUNNING would now describe in the background. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); + mocks.startWarehouse.mockResolvedValue(undefined); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + process.env.NODE_ENV = "development"; + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test("coalesces overlapping triggers into one in-flight + one trailing run", async () => { + // First generate hangs on a deferred; while it's in flight we fire two more + // triggers. They must NOT start concurrently — they collapse into a single + // trailing run after the first settles. + const first = deferred(); + const second = deferred(); + mocks.generateFromEntryPoint + .mockReturnValueOnce(first.promise) + .mockReturnValueOnce(second.promise); + + const plugin = makeConfiguredPlugin(); + const { server, watcher } = makeFakeServer(); + const configureServer = getHook( + plugin, + "configureServer", + ); + const buildStart = getHook(plugin, "buildStart"); + + configureServer(server); + + // Trigger 1: the initial build. Starts generate #1 (now in flight). + await buildStart(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + const sqlFile = path.join(process.cwd(), "config", "queries", "q.sql"); + // Triggers 2 and 3 arrive while #1 is still in flight: no new run starts. + watcher.emit("change", sqlFile); + watcher.emit("change", sqlFile); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + // Settle #1 → exactly ONE trailing run fires for the coalesced triggers. + first.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + + // Settle the trailing run; no further runs queued. + second.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + + test("a trigger after the previous run settled starts a fresh run", async () => { + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + + const plugin = makeConfiguredPlugin(); + const { server, watcher } = makeFakeServer(); + getHook(plugin, "configureServer")(server); + const buildStart = getHook(plugin, "buildStart"); + + await buildStart(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + + // Nothing in flight now: a later .sql change runs generate again. + watcher.emit( + "change", + path.join(process.cwd(), "config", "queries", "q.sql"), + ); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); +}); + +describe("appKitTypesPlugin — background warehouse watch", () => { + const savedNodeEnv = process.env.NODE_ENV; + const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + mocks.generateFromEntryPoint.mockResolvedValue(undefined); + mocks.startWarehouse.mockResolvedValue(undefined); + process.env.DATABRICKS_WAREHOUSE_ID = "wh-test"; + }); + + afterEach(() => { + if (savedNodeEnv === undefined) delete process.env.NODE_ENV; + else process.env.NODE_ENV = savedNodeEnv; + + if (savedWarehouseId === undefined) + delete process.env.DATABRICKS_WAREHOUSE_ID; + else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId; + }); + + test("STOPPED → starts the warehouse, then RUNNING regenerates in dev", async () => { + process.env.NODE_ENV = "development"; + // Warehouse is stopped; the watch must kick off a start, then the poller + // sees it reach RUNNING. + mocks.getWarehouseState.mockResolvedValue("STOPPED" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // A stopped warehouse is nudged to start before we wait on it. + expect(mocks.startWarehouse).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + // Because WE issued the start, the wait must poll through a stale post-start + // STOPPED/STOPPING reading instead of bailing — assert the flag is set. + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: true }), + ); + // Call 1: initial buildStart generate. Call 2: the watch's regenerate once + // the warehouse reached RUNNING. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + // Foreground (dev) degrades instantly; the background watch regenerate must + // DESCRIBE the now-RUNNING warehouse, so it runs blocking. + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("STARTING → waits without starting, then RUNNING regenerates in dev", async () => { + process.env.NODE_ENV = "development"; + // Warehouse is cold-starting, then warms up to RUNNING. + mocks.getWarehouseState.mockResolvedValue("STARTING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // Already coming up: no redundant start, just wait + regenerate. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + // We didn't start it, so the wait keeps the default terminal states (a + // STOPPED reading here would be a real stop, not a stale pre-start blip). + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: false }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + // Foreground (dev) degrades instantly; the background watch regenerate runs + // blocking so it describes the now-RUNNING warehouse. + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("RUNNING → describes in the background after the dev foreground degrade", async () => { + // Phase 3 regression fix: in dev the foreground only degrades, so a RUNNING + // warehouse must still get REAL types from a background describe — it must + // NOT be skipped just because it's already warm. + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + // Already RUNNING: no start. The wait is still issued (it returns on the + // first poll for a running warehouse), and we didn't start it, so the + // default terminal states apply. + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1); + expect(mocks.waitUntilRunning).toHaveBeenCalledWith( + expect.anything(), + "wh-test", + expect.objectContaining({ treatStoppedAsTransient: false }), + ); + // Call 1: initial buildStart foreground (degraded). Call 2: the background + // regenerate that DESCRIBEs the running warehouse and lands real types. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ mode: "non-blocking" }), + ); + expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ mode: "blocking" }), + ); + }); + + test("DELETED → leaves degraded types, no start/wait/regenerate, no crash", async () => { + process.env.NODE_ENV = "development"; + // A deleted warehouse can't be started and blocking typegen would treat it + // as fatal, so the watch must leave the foreground's degraded types in place. + mocks.getWarehouseState.mockResolvedValue("DELETED" as WarehouseState); + + await expect(runPlugin()).resolves.toBeUndefined(); + await flush(); + + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + // Only the initial (degraded) foreground generate ran; nothing threw. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("DELETING → leaves degraded types, no start/wait/regenerate", async () => { + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("DELETING" as WarehouseState); + + await runPlugin(); + await flush(); + + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("background regenerate errors are swallowed (no crash), degraded remains", async () => { + // Even when the warehouse is RUNNING and the blocking regenerate THROWS + // (e.g. DESCRIBE surfaced a syntax/fatal error), nothing escapes into dev + // startup: in dev generateOnce catches+logs the throw (and the detached + // IIFE's catch is a further backstop), so the process never crashes and the + // degraded types written by the foreground remain. + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + // First call = foreground (degraded) succeeds; second = background blocking + // describe rejects. + mocks.generateFromEntryPoint + .mockResolvedValueOnce(undefined) + .mockRejectedValueOnce(new Error("warehouse exploded")); + + await expect(runPlugin()).resolves.toBeUndefined(); + await flush(); + + // The background regenerate was attempted (2 calls) but its rejection never + // escaped into the caller. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2); + }); + + test("no watch in production (armWarehouseWatch no-ops)", async () => { + process.env.NODE_ENV = "production"; + // Even if the warehouse were STOPPED, production must not arm the watch. + mocks.getWarehouseState.mockResolvedValue("STOPPED" as WarehouseState); + mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState); + + await runPlugin(); + await flush(); + + expect(mocks.getWarehouseState).not.toHaveBeenCalled(); + expect(mocks.startWarehouse).not.toHaveBeenCalled(); + expect(mocks.waitUntilRunning).not.toHaveBeenCalled(); + // Only the blocking initial build runs. + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); + + test("aborts the armed watch when the dev server closes", async () => { + process.env.NODE_ENV = "development"; + mocks.getWarehouseState.mockResolvedValue("STARTING" as WarehouseState); + + // Capture the signal handed to waitUntilRunning so we can assert the close + // hook aborts it. Keep the wait pending until then. + let capturedSignal: AbortSignal | undefined; + const wait = deferred(); + mocks.waitUntilRunning.mockImplementation( + (_client, _id, opts: { signal?: AbortSignal }) => { + capturedSignal = opts.signal; + return wait.promise.then(() => "RUNNING" as WarehouseState); + }, + ); + + const plugin = makeConfiguredPlugin(); + const { server, httpServer } = makeFakeServer(); + getHook(plugin, "configureServer")(server); + const buildStart = getHook(plugin, "buildStart"); + + await buildStart(); + await flush(); + + expect(capturedSignal).toBeDefined(); + expect(capturedSignal?.aborted).toBe(false); + + // Dev server shutdown must abort the pending warehouse wait. + httpServer.emit("close"); + expect(capturedSignal?.aborted).toBe(true); + + // Let the (now-aborted) wait settle; the IIFE swallows it and skips the + // regenerate because the signal is aborted. + wait.resolve(); + await flush(); + expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/appkit/src/type-generator/tests/warehouse-status.test.ts b/packages/appkit/src/type-generator/tests/warehouse-status.test.ts new file mode 100644 index 000000000..700df2599 --- /dev/null +++ b/packages/appkit/src/type-generator/tests/warehouse-status.test.ts @@ -0,0 +1,194 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { + getWarehouseState, + type WarehouseState, + waitUntilRunning, +} from "../warehouse-status"; + +/** + * Build a minimal WorkspaceClient stub exposing only `warehouses.get`, the one + * method these helpers touch. Cast through `unknown` to the SDK type so callers + * type-check without us constructing a real client. + */ +function makeClient(get: ReturnType): WorkspaceClient { + return { warehouses: { get } } as unknown as WorkspaceClient; +} + +/** A warehouses.get resolution carrying a given lifecycle state. */ +const stateResponse = (state: WarehouseState) => ({ state }); + +describe("getWarehouseState", () => { + test("returns the .state from warehouses.get", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("RUNNING")); + const client = makeClient(get); + + await expect(getWarehouseState(client, "wh-1")).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledWith({ id: "wh-1" }); + }); + + test("propagates errors from warehouses.get (does not catch)", async () => { + const get = vi.fn().mockRejectedValue(new Error("boom")); + const client = makeClient(get); + + await expect(getWarehouseState(client, "wh-1")).rejects.toThrow("boom"); + }); +}); + +describe("waitUntilRunning", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + test("resolves RUNNING after polling through STARTING states", async () => { + const get = vi + .fn() + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("RUNNING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { maxMs: 60000 }); + + // Drive the fake clock past each backoff delay (1000ms, then 2000ms) so the + // subsequent polls fire. advanceTimersByTimeAsync also flushes the awaited + // getWarehouseState microtasks between polls. + await vi.advanceTimersByTimeAsync(1000); + await vi.advanceTimersByTimeAsync(2000); + + await expect(promise).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledTimes(3); + }); + + test("resolves with a not-coming-up state without waiting (STOPPED)", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STOPPED")); + const client = makeClient(get); + + // First poll already returns STOPPED, so no timer advance is needed. + await expect( + waitUntilRunning(client, "wh-1", { maxMs: 60000 }), + ).resolves.toBe("STOPPED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("STOPPED stays terminal when treatStoppedAsTransient is false", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STOPPED")); + const client = makeClient(get); + + // Explicit false mirrors the default: STOPPED is terminal, resolves at once. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: false, + }), + ).resolves.toBe("STOPPED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("treatStoppedAsTransient polls through STOPPED until RUNNING", async () => { + // A start was just issued, so the first poll still reports the stale STOPPED + // before the start propagates. With the flag on we must NOT bail on it — + // keep polling (STOPPED → STARTING → RUNNING) and resolve RUNNING. + const get = vi + .fn() + .mockResolvedValueOnce(stateResponse("STOPPED")) + .mockResolvedValueOnce(stateResponse("STARTING")) + .mockResolvedValueOnce(stateResponse("RUNNING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: true, + }); + + // Drive past each backoff (1000ms, then 2000ms) so the later polls fire. + await vi.advanceTimersByTimeAsync(1000); + await vi.advanceTimersByTimeAsync(2000); + + await expect(promise).resolves.toBe("RUNNING"); + expect(get).toHaveBeenCalledTimes(3); + }); + + test("treatStoppedAsTransient still treats DELETED as terminal", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("DELETED")); + const client = makeClient(get); + + // A deleted warehouse genuinely can't reach RUNNING, so even with the flag + // on it resolves immediately with the observed state. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + treatStoppedAsTransient: true, + }), + ).resolves.toBe("DELETED"); + expect(get).toHaveBeenCalledTimes(1); + }); + + test("rejects when maxMs elapses while still STARTING", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + + const promise = waitUntilRunning(client, "wh-1", { maxMs: 3000 }); + // Attach a rejection handler immediately so the eventual throw isn't an + // unhandled rejection while we advance the clock. + const settled = expect(promise).rejects.toThrow( + /wh-1 did not reach RUNNING within 3000ms/, + ); + + // Push well past the 3000ms budget; exponential backoff (1000 + 2000 = 3000) + // means the deadline check trips on the next iteration. + await vi.advanceTimersByTimeAsync(10000); + + await settled; + }); + + test("stops immediately when the signal is already aborted", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + + const controller = new AbortController(); + controller.abort(); + + // The pre-loop abort check throws before the first poll, so warehouses.get + // is never even called. + await expect( + waitUntilRunning(client, "wh-1", { + maxMs: 60000, + signal: controller.signal, + }), + ).rejects.toMatchObject({ name: "AbortError" }); + expect(get).not.toHaveBeenCalled(); + }); + + test("stops promptly when aborted mid-wait", async () => { + const get = vi.fn().mockResolvedValue(stateResponse("STARTING")); + const client = makeClient(get); + const controller = new AbortController(); + + const promise = waitUntilRunning(client, "wh-1", { + maxMs: 60000, + signal: controller.signal, + }); + const settled = expect(promise).rejects.toMatchObject({ + name: "AbortError", + }); + + // Flush only the first poll's microtask (advance 0, not the full 1000ms + // backoff): the wait is now parked on its first backoff sleep, one poll in. + await vi.advanceTimersByTimeAsync(0); + expect(get).toHaveBeenCalledTimes(1); + + // Abort while parked between polls: the backoff sleep resolves immediately + // via its abort listener, then the post-sleep abort check throws — so we + // never issue a second poll. + controller.abort(); + await vi.advanceTimersByTimeAsync(0); + + await settled; + expect(get).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/appkit/src/type-generator/types.ts b/packages/appkit/src/type-generator/types.ts index f54176a8c..684b69045 100644 --- a/packages/appkit/src/type-generator/types.ts +++ b/packages/appkit/src/type-generator/types.ts @@ -76,3 +76,46 @@ export interface QuerySchema { name: string; type: string; } + +/** + * A genuine SQL error: `DESCRIBE QUERY` ran against a *reachable* warehouse and + * the warehouse reported the statement as FAILED (bad table, syntax error, + * incompatible type, …). Distinct from a connectivity failure (warehouse + * unreachable), which is non-fatal and never recorded here. + * @property name - the query name + * @property message - the SQL error message reported by the warehouse + */ +export interface QuerySyntaxError { + name: string; + message: string; +} + +/** + * A non-SQL fatal error while attempting to describe a query: authentication, + * authorization, invalid warehouse/configuration, malformed SDK request, or + * any other setup problem that should not be treated as an offline warehouse. + * @property name - the query name + * @property message - the fatal error message + */ +export interface QueryFatalError { + name: string; + message: string; +} + +/** + * Result of describing a folder of queries. + * @property schemas - one schema per query, in original file order. Queries that + * could not be described carry `result: unknown` so output stays valid. + * @property syntaxErrors - queries whose DESCRIBE failed against a reachable + * warehouse (genuine SQL errors). Connectivity failures are deliberately NOT + * included: they degrade silently (reuse last-known-good type or emit + * `unknown`) so a transient outage never fails a build. + * @property fatalErrors - non-SQL fatal describe request failures. These still + * produce `result: unknown` schemas so callers can write declarations before + * surfacing the error. + */ +export interface QueryGenerationResult { + schemas: QuerySchema[]; + syntaxErrors: QuerySyntaxError[]; + fatalErrors: QueryFatalError[]; +} diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts index 5f4a0d4b1..c869acdf6 100644 --- a/packages/appkit/src/type-generator/vite-plugin.ts +++ b/packages/appkit/src/type-generator/vite-plugin.ts @@ -1,15 +1,32 @@ import { existsSync } from "node:fs"; import path from "node:path"; +import { WorkspaceClient } from "@databricks/sdk-experimental"; import type { Plugin } from "vite"; import { createLogger } from "../logging/logger"; import { ANALYTICS_TYPES_FILE, generateFromEntryPoint, TYPES_DIR, + TypegenFatalError, + TypegenSyntaxError, } from "./index"; +import type { PreflightMode } from "./preflight"; +import { + getWarehouseState, + startWarehouse, + waitUntilRunning, +} from "./warehouse-status"; const logger = createLogger("type-generator:vite-plugin"); +/** + * How long the DEV background watcher waits for a STARTING warehouse to reach + * RUNNING before giving up. Short relative to the CLI's preflight budget: this + * is a best-effort "regenerate once the warehouse warms up" convenience, not a + * gate, so we'd rather stop polling than hold a detached task open for minutes. + */ +const DEV_WAREHOUSE_WATCH_MAX_MS = 60_000; + /** * Options for the AppKit types plugin. */ @@ -30,7 +47,39 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { let outFile: string; let watchFolders: string[]; - async function generate() { + // Single-flight state for runGenerate(). `inFlight` is the promise of the + // currently-running drain (null when idle); `queued` records that a trigger + // arrived while a run was active so exactly ONE trailing run fires afterwards + // (latest-wins — coalesces any number of overlapping triggers into a single + // rerun). `queued` is read/cleared synchronously inside the drain loop so a + // trigger landing in any window is caught before the drain exits. + // + // `pendingMode` is the mode the next generate should run in (latest-wins, like + // `queued`): the foreground build runs non-blocking in dev (instant degrade) + // while the background warehouse watch runs blocking (real DESCRIBEs). A + // blocking watch trigger that lands while a non-blocking foreground run is in + // flight therefore still describes when its trailing run fires. + let inFlight: Promise | null = null; + let queued = false; + let pendingMode: PreflightMode = "non-blocking"; + + // The currently-armed DEV background warehouse watch, if any. Aborting it + // stops a pending waitUntilRunning (server shutdown, or a newer arm replacing + // an older one). + let watchController: AbortController | null = null; + + /** + * Generate types once in the given preflight {@link PreflightMode}. Never + * throws in dev (logs instead); in production it rethrows so the build fails. + * This is the un-guarded core — callers should go through {@link runGenerate} + * so concurrent triggers can't race-write the .d.ts. + * + * @param mode - preflight policy for this run. The foreground build passes a + * NODE_ENV-derived mode (blocking in production, non-blocking in dev so it + * degrades instantly); the background warehouse watch passes "blocking" so + * its regenerate actually DESCRIBEs and lands real (non-degraded) types. + */ + async function generateOnce(mode: PreflightMode) { try { const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || ""; @@ -44,16 +93,185 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { queryFolder: watchFolders[0], warehouseId, noCache: false, + mode, }); } catch (error) { + // TypegenSyntaxError / TypegenFatalError carry a complete, actionable + // report in their message. Their stack frames and attached query arrays + // point into appkit internals and only add noise, so surface just the + // message — both when failing the prod build and when logging in dev. + const isTypegenError = + error instanceof TypegenSyntaxError || + error instanceof TypegenFatalError; + // throw in production to fail the build if (process.env.NODE_ENV === "production") { + if (isTypegenError) error.stack = error.message; throw error; } - logger.error("Error generating types: %O", error); + + if (isTypegenError) { + logger.error("%s", error.message); + } else { + logger.error("Error generating types: %O", error); + } } } + /** + * Single-flight wrapper around {@link generateOnce}. The initial build, the + * .sql watcher, and the DEV warehouse watch all route through here so they can + * never run typegen concurrently (which would race-write the .d.ts). + * + * If a run is already in flight, this does NOT start a second one — it records + * the requested mode and sets a trailing flag so exactly one more run fires + * after the current finishes, coalescing any number of overlapping triggers + * (latest-wins, including the mode: a blocking watch trigger that arrives mid + * non-blocking foreground run still describes when its trailing run fires). + * + * @param mode - preflight policy for this run. Recorded into `pendingMode`, + * which the drain reads for each generate (latest trigger wins). + * @returns A promise that resolves when this trigger's work (including any + * trailing run it scheduled) has completed. + */ + function runGenerate(mode: PreflightMode): Promise { + pendingMode = mode; + + if (inFlight) { + // A run is active: remember that another trigger arrived and ride out the + // current run. One trailing run then covers all coalesced triggers and + // runs in the latest requested mode (recorded above). + queued = true; + return inFlight; + } + + // Drain in a loop rather than recursing after a single queued-check: a + // trigger can land in the window between generateOnce() resolving and the + // check, so we re-test `queued` until it's clear. Critically, `inFlight` is + // cleared synchronously in the SAME tick as the final `queued === false` + // observation — never deferred to a .finally microtask — so there's no + // window where a trigger sees `inFlight` set but the drain has already + // decided to exit. The guard stays held for the whole drain, so concurrent + // triggers only ever set the flag; they never start a parallel generate. + const drain = async (): Promise => { + while (true) { + queued = false; + // Snapshot the mode synchronously alongside clearing `queued` so a + // trigger landing during this generate is observed (via `queued`) on the + // next loop with its own mode, not silently dropped. + const runMode = pendingMode; + await generateOnce(runMode); + // Synchronous check + clear, atomic w.r.t. other (synchronous) callers. + if (!queued) { + inFlight = null; + return; + } + } + }; + + inFlight = drain(); + return inFlight; + } + + /** + * DEV-only: get the warehouse to RUNNING in the background and regenerate with + * real (non-degraded) types once it is — without blocking dev startup. The + * foreground build only ever degrades in dev (instant `unknown`/cached types), + * so this is what lands actual DESCRIBE results in the editor for EVERY + * reachable warehouse state, not just one that happens to already be warm. + * + * Post-probe behaviour by state: + * - RUNNING → describe right away (the dev foreground degraded, so a running + * warehouse would otherwise never get real types — this is the case Phase 3 + * restores). `waitUntilRunning` returns immediately for an already-running + * warehouse, then the blocking regenerate fires. + * - STARTING → it's already coming up; just wait for RUNNING, then describe. + * - STOPPED / STOPPING → kick off a start, wait for RUNNING, then describe. + * - DELETED / DELETING → return (a deleted warehouse can't be started, and + * blocking typegen would treat it as fatal); leave the degraded types. + * + * No-op in production or without a warehouse id. Replaces any previously-armed + * watch (aborting it first). Fully self-contained: it never throws into the + * caller and never re-arms itself. The whole lifecycle is abortable via the + * shared {@link watchController} — its signal is threaded into + * `waitUntilRunning`, so a dev-server shutdown cancels a pending wait — and the + * regenerate routes through {@link runGenerate} so it can't race-write the + * .d.ts with the foreground degrade or a `.sql` re-trigger. + * + * The regenerate runs in "blocking" mode (not the foreground's non-blocking) + * so it actually DESCRIBEs the now-RUNNING warehouse and lands real types — + * the whole point of warming the warehouse in the background. + */ + function armWarehouseWatch(): void { + if (process.env.NODE_ENV === "production") return; + + const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || ""; + if (!warehouseId) return; + + // Supersede any in-flight watch so we never run two concurrently. + watchController?.abort(); + const controller = new AbortController(); + watchController = controller; + const { signal } = controller; + + void (async () => { + try { + const client = new WorkspaceClient({}); + const state = await getWarehouseState(client, warehouseId); + + // A deleted/deleting warehouse can't be started and blocking typegen + // would treat it as fatal — leave the degraded types and stop. Every + // other state (including RUNNING) proceeds to wait-then-describe so the + // dev editor gets real types, not just the foreground's degraded ones. + if (state === "DELETED" || state === "DELETING") { + return; + } + + // Stopped/stopping won't reach RUNNING on its own — nudge it. RUNNING and + // STARTING need no start (RUNNING is already up; STARTING is coming up), + // so don't issue a redundant one. A failed start is non-fatal: give up + // silently rather than throw out of the detached task (the developer + // still has degraded/cached types). + let startedByUs = false; + if (state === "STOPPED" || state === "STOPPING") { + try { + logger.debug("Warehouse is %s; starting it.", state); + await startWarehouse(client, warehouseId); + startedByUs = true; + } catch { + return; + } + } + + // Wait for RUNNING. For an already-RUNNING warehouse this returns on the + // first poll; for STARTING/STOPPED it polls (abortably) until the + // warehouse warms up, a terminal state, or the deadline. + const final = await waitUntilRunning(client, warehouseId, { + maxMs: DEV_WAREHOUSE_WATCH_MAX_MS, + signal, + // We just issued the start, so the first poll(s) often still report + // STOPPED/STOPPING before the start propagates. Poll through those + // instead of bailing, or the regenerate would never fire. When we + // didn't start it (RUNNING/STARTING branch), keep the default terminal + // states. + treatStoppedAsTransient: startedByUs, + }); + + if (final === "RUNNING" && !signal.aborted) { + logger.debug("Warehouse is RUNNING; regenerating types."); + // Blocking: the warehouse is RUNNING now, so describe it and emit real + // (non-degraded) types — unlike the foreground dev run, which degraded. + // Routed through the single-flight guard so it coalesces with the + // foreground degrade / any `.sql` re-trigger instead of racing them. + await runGenerate("blocking"); + } + } catch { + // Detached background task: any failure (timeout, abort, connectivity, + // auth) is non-fatal — the developer still has degraded/cached types. + } + })(); + } + return { name: "appkit-types", @@ -84,7 +302,20 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { }, buildStart() { - generate(); + // Production: block the build on this generate (and surface failures). + // The watch is a dev-only no-op, so just run typegen. + if (process.env.NODE_ENV === "production") { + return runGenerate("blocking"); + } + + // Dev: don't block startup waiting on typegen. The foreground generate runs + // non-blocking — it skips the warehouse entirely and writes degraded + // (cached/`unknown`) types instantly. Then arm the warehouse watch so the + // warehouse gets a one-shot BLOCKING regenerate (real types) in the + // background for EVERY reachable state: RUNNING describes right away, while + // STARTING/STOPPED are waited (and started) until they reach RUNNING. + void runGenerate("non-blocking"); + armWarehouseWatch(); }, configureServer(server) { @@ -96,9 +327,22 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin { ); if (isWatchedFile && changedFile.endsWith(".sql")) { - generate(); + // Route through the single-flight runner (was fire-and-forget + // generate(), which could race the initial build / watch). This is a + // dev-only hook, so degrade instantly (non-blocking), then re-arm the + // warehouse watch so the edited query is re-described in the background + // against the running warehouse (or once a still-starting one warms + // up), landing fresh blocking-described types. + void runGenerate("non-blocking"); + armWarehouseWatch(); } }); + + // Tear down any pending warehouse watch when the dev server closes so a + // long backoff can't keep the process alive after shutdown. + server.httpServer?.once("close", () => { + watchController?.abort(); + }); }, }; } diff --git a/packages/appkit/src/type-generator/warehouse-status.ts b/packages/appkit/src/type-generator/warehouse-status.ts new file mode 100644 index 000000000..aa50089ef --- /dev/null +++ b/packages/appkit/src/type-generator/warehouse-status.ts @@ -0,0 +1,176 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; + +/** + * Lifecycle states a SQL warehouse can report. Mirrors the Databricks SDK + * `State` union; redeclared here so callers of this module don't need to reach + * into the SDK's deep type paths. + */ +export type WarehouseState = + | "RUNNING" + | "STARTING" + | "STOPPED" + | "STOPPING" + | "DELETING" + | "DELETED"; + +/** Backoff bounds for {@link waitUntilRunning}. */ +const INITIAL_POLL_MS = 1000; +const MAX_POLL_MS = 15000; + +/** States from which the warehouse will not transition to RUNNING on its own. */ +const NOT_COMING_UP: ReadonlySet = new Set([ + "STOPPED", + "STOPPING", + "DELETED", + "DELETING", +]); + +/** + * Terminal states even when {@link waitUntilRunning} is told to treat + * STOPPED/STOPPING as transient: a deleted (or deleting) warehouse genuinely + * can't reach RUNNING, so we still resolve with the observed state. + */ +const NEVER_COMING_UP: ReadonlySet = new Set([ + "DELETED", + "DELETING", +]); + +/** + * Sleep for `ms`, resolving early if `signal` aborts. The pending timer is + * always cleared (on resolve and on abort) so a long backoff can't keep the + * event loop alive after the caller has bailed. + */ +function delay(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve) => { + if (signal?.aborted) { + resolve(); + return; + } + + const timer = setTimeout(() => { + signal?.removeEventListener("abort", onAbort); + resolve(); + }, ms); + + function onAbort() { + clearTimeout(timer); + resolve(); + } + + signal?.addEventListener("abort", onAbort, { once: true }); + }); +} + +/** + * Fetch the current lifecycle state of a SQL warehouse. + * + * Errors from the SDK (auth, bad warehouse id, connectivity) are intentionally + * NOT caught — the caller decides how to classify and react to them. + */ +export async function getWarehouseState( + client: WorkspaceClient, + warehouseId: string, +): Promise { + const response = await client.warehouses.get({ id: warehouseId }); + return response.state as WarehouseState; +} + +/** + * Initiate a start of a stopped/stopping SQL warehouse. + * + * Only KICKS OFF the start: the SDK's `start()` returns a Waiter, but we + * deliberately do not `.wait()` on it. Blocking on the full cold-start isn't our + * job here — {@link waitUntilRunning} is the poller that watches the warehouse + * the rest of the way to RUNNING. We just nudge it out of the stopped state. + * + * Errors from the SDK (auth, bad warehouse id, connectivity) are intentionally + * NOT caught — the caller decides how to classify and react to them. + */ +export async function startWarehouse( + client: WorkspaceClient, + warehouseId: string, +): Promise { + await client.warehouses.start({ id: warehouseId }); +} + +/** + * Poll a warehouse until it reaches RUNNING, settles into a state it won't + * leave on its own, or a deadline elapses. + * + * Polling uses exponential backoff: the first wait is ~{@link INITIAL_POLL_MS}, + * doubling on each subsequent poll up to a ~{@link MAX_POLL_MS} cap. + * + * Resolution: + * - Resolves `"RUNNING"` as soon as the warehouse is running. + * - Resolves with the observed state if it reaches a not-coming-up state + * (`STOPPED`/`STOPPING`/`DELETED`/`DELETING`) — the caller decides what to do. + * + * Set `opts.treatStoppedAsTransient` when the caller has just issued a start and + * a still-`STOPPED`/`STOPPING` reading is expected to be a stale pre-start blip + * rather than a settled state. With it on, those two states are polled through + * (like `STARTING`) until RUNNING, a genuinely terminal `DELETED`/`DELETING`, or + * the deadline — so an immediate post-start STOPPED reading no longer bails the + * wait. Off (default), STOPPED/STOPPING remain terminal and resolve as before. + * + * Pass `opts.signal` to abort an in-progress wait (e.g. a dev server shutting + * down): the next deadline/abort check throws an `AbortError`, and a pending + * backoff sleep resolves immediately rather than holding the process open. + * + * @throws Error if `maxMs` elapses before the warehouse reaches RUNNING. + * @throws Error (`name === "AbortError"`) if `opts.signal` is or becomes aborted. + */ +export async function waitUntilRunning( + client: WorkspaceClient, + warehouseId: string, + opts: { + maxMs: number; + pollMs?: number; + signal?: AbortSignal; + treatStoppedAsTransient?: boolean; + }, +): Promise { + const { maxMs, signal, treatStoppedAsTransient } = opts; + const start = Date.now(); + let pollMs = opts.pollMs ?? INITIAL_POLL_MS; + + // Which states end the wait early. When we've just issued a start, STOPPED and + // STOPPING are expected stale readings, so only DELETED/DELETING stay terminal. + const terminalStates = treatStoppedAsTransient + ? NEVER_COMING_UP + : NOT_COMING_UP; + + while (true) { + throwIfAborted(signal); + + const state = await getWarehouseState(client, warehouseId); + if (state === "RUNNING") return "RUNNING"; + if (terminalStates.has(state)) return state; + + if (Date.now() - start >= maxMs) { + throw new Error( + `Warehouse ${warehouseId} did not reach RUNNING within ${maxMs}ms (last state: ${state})`, + ); + } + + await delay(pollMs, signal); + throwIfAborted(signal); + + // Re-check the deadline after sleeping so we don't issue another poll past + // the budget purely because we napped through it. + if (Date.now() - start >= maxMs) { + throw new Error( + `Warehouse ${warehouseId} did not reach RUNNING within ${maxMs}ms (last state: ${state})`, + ); + } + + pollMs = Math.min(pollMs * 2, MAX_POLL_MS); + } +} + +/** Throw a DOMException-style AbortError if the signal has been aborted. */ +function throwIfAborted(signal?: AbortSignal): void { + if (!signal?.aborted) return; + const error = new Error("The warehouse wait was aborted."); + error.name = "AbortError"; + throw error; +} diff --git a/packages/shared/src/cli/commands/generate-types.test.ts b/packages/shared/src/cli/commands/generate-types.test.ts new file mode 100644 index 000000000..df65f047c --- /dev/null +++ b/packages/shared/src/cli/commands/generate-types.test.ts @@ -0,0 +1,233 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { + afterEach, + beforeEach, + describe, + expect, + type Mock, + test, + vi, +} from "vitest"; + +// --- Module mocks ----------------------------------------------------------- +// vi.mock factories are hoisted above the file, so the spies they return must be +// created in a hoisted block too (plain top-level consts would be in the TDZ when +// the hoisted factory runs). +const { + generateFromEntryPoint, + generateServingTypes, + unref, + spawn, + acquireSpawnLock, + releaseSpawnLock, + getSpawnLockPath, +} = vi.hoisted(() => { + // `path` import isn't available yet inside a hoisted block; require it here. + const nodePath = require("node:path") as typeof import("node:path"); + const unref = vi.fn(); + const lockPathOf = (root: string) => + nodePath.join(root, "node_modules", ".databricks", "appkit", "worker.lock"); + return { + generateFromEntryPoint: vi.fn(async () => {}), + generateServingTypes: vi.fn(async () => {}), + unref, + spawn: vi.fn( + (_bin: string, _args: string[], _opts: Record) => ({ + unref, + }), + ), + acquireSpawnLock: vi.fn(() => true), + releaseSpawnLock: vi.fn(), + getSpawnLockPath: vi.fn(lockPathOf), + }; +}); + +// The library type-generator is an optional/ambient module; mock it so the +// command's `await import("@databricks/appkit/type-generator")` resolves to spies +// and never touches a warehouse. +vi.mock("@databricks/appkit/type-generator", () => ({ + generateFromEntryPoint, + generateServingTypes, +})); + +// Mock the detached spawn so we can assert how the worker is launched without +// actually forking a process. +vi.mock("node:child_process", () => ({ spawn })); + +// Mock the single-flight lock so each test controls acquire/steal outcomes and +// we can assert release. Steal/fresh semantics of the real implementation are +// covered separately in spawn-lock.test.ts. +vi.mock("./spawn-lock.js", () => ({ + acquireSpawnLock, + releaseSpawnLock, + getSpawnLockPath, + SPAWN_LOCK_STALE_MS: 360_000, +})); + +import { generateTypesCommand, resolveTypegenMode } from "./generate-types"; + +/** + * Drive the real commander command the way the bin does, so argv parsing + * (`--wait`, `--worker-lock ` → camelCase, positionals) is exercised + * end-to-end. `from: "user"` means args are the user-supplied tokens only. + */ +async function runCli(args: string[]): Promise { + await generateTypesCommand.parseAsync(args, { from: "user" }); +} + +describe("resolveTypegenMode (generate-types --wait)", () => { + test("defaults to non-blocking when no options/flag are given", () => { + // A one-shot CLI never describes by default — it emits cached/`unknown` types + // and exits 0 instead of blocking on (or failing because of) a warehouse, + // even a RUNNING one. The template's postinstall/predev rely on this. + expect(resolveTypegenMode()).toBe("non-blocking"); + expect(resolveTypegenMode({})).toBe("non-blocking"); + }); + + test("stays non-blocking when wait is false (flag absent)", () => { + expect(resolveTypegenMode({ wait: false })).toBe("non-blocking"); + }); + + test("switches to blocking when --wait sets wait to true", () => { + // commander maps `--wait` to `{ wait: true }`. A deliberate/CI invocation + // opts in to waiting for a starting warehouse and failing fast on a stopped + // one. + expect(resolveTypegenMode({ wait: true })).toBe("blocking"); + }); +}); + +describe("generate-types foreground spawn orchestration", () => { + let tmpRoot: string; + let consoleLog: Mock; + let consoleError: Mock; + const prevWarehouse = process.env.DATABRICKS_WAREHOUSE_ID; + + beforeEach(() => { + vi.clearAllMocks(); + acquireSpawnLock.mockReturnValue(true); + + // A real temp project root with a config/queries folder so the analytics + // generate path runs. + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "gentypes-")); + fs.mkdirSync(path.join(tmpRoot, "config", "queries"), { recursive: true }); + process.env.DATABRICKS_WAREHOUSE_ID = "wh-123"; + + consoleLog = vi.spyOn(console, "log").mockImplementation(() => {}) as Mock; + consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => {}) as Mock; + }); + + afterEach(() => { + vi.restoreAllMocks(); + fs.rmSync(tmpRoot, { recursive: true, force: true }); + if (prevWarehouse === undefined) { + delete process.env.DATABRICKS_WAREHOUSE_ID; + } else { + process.env.DATABRICKS_WAREHOUSE_ID = prevWarehouse; + } + }); + + test("non-blocking: generates degraded types and spawns exactly one detached worker", async () => { + const outFile = path.join(tmpRoot, "shared/appkit-types/analytics.d.ts"); + + await runCli([tmpRoot, outFile, "wh-123"]); + + // Library generate ran in non-blocking mode (writes degraded types). + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "non-blocking" }), + ); + + // Exactly one detached worker, re-invoking this CLI with --wait and the + // worker lock, forwarding the same positional targets. + expect(spawn).toHaveBeenCalledTimes(1); + const [bin, argv, opts] = spawn.mock.calls[0]; + expect(bin).toBe(process.execPath); + // The parent's node/loader flags (process.execArgv — e.g. tsx's + // --require/--import) are forwarded before the CLI entry so a worker spawned + // from a source/tsx run can still execute the .ts. Everything from the entry + // onward is the worker invocation. + const entryIdx = argv.indexOf(process.argv[1]); + expect(entryIdx).toBeGreaterThanOrEqual(0); + expect(argv.slice(0, entryIdx)).toEqual(process.execArgv); + expect(argv.slice(entryIdx)).toEqual([ + process.argv[1], // CLI entry + "generate-types", + "--wait", + "--worker-lock", + getSpawnLockPath(tmpRoot), + tmpRoot, + outFile, + "wh-123", + ]); + expect(opts).toMatchObject({ detached: true, stdio: "ignore" }); + expect(unref).toHaveBeenCalledTimes(1); + }); + + test("lock already held (fresh): does NOT spawn, foreground still resolves", async () => { + acquireSpawnLock.mockReturnValue(false); + + await expect(runCli([tmpRoot])).resolves.toBeUndefined(); + + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(spawn).not.toHaveBeenCalled(); + // One-line single-flight note. + expect(consoleLog).toHaveBeenCalledWith( + "Type refresh already in progress, skipping.", + ); + }); + + test("stale lock: steals (acquire returns true) and spawns", async () => { + // acquireSpawnLock returning true models a stolen stale lock (the real steal + // path is unit-tested in spawn-lock.test.ts). + acquireSpawnLock.mockReturnValue(true); + + await runCli([tmpRoot]); + + expect(acquireSpawnLock).toHaveBeenCalledTimes(1); + expect(spawn).toHaveBeenCalledTimes(1); + }); + + test("spawn throwing is non-fatal: foreground does not reject", async () => { + spawn.mockImplementationOnce(() => { + throw new Error("EAGAIN"); + }); + + await expect(runCli([tmpRoot])).resolves.toBeUndefined(); + + // Generate still ran; failure was swallowed and logged. + expect(generateFromEntryPoint).toHaveBeenCalledTimes(1); + expect(consoleError).toHaveBeenCalledWith( + expect.stringContaining("Could not start background type refresh"), + ); + }); + + test("--wait (deliberate/CI) generates blocking and never spawns", async () => { + await runCli([tmpRoot, "--wait"]); + + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + expect(acquireSpawnLock).not.toHaveBeenCalled(); + expect(spawn).not.toHaveBeenCalled(); + }); + + test("worker invocation (--worker-lock): runs blocking generate, releases lock, does NOT spawn", async () => { + const lockPath = getSpawnLockPath(tmpRoot); + + await runCli([tmpRoot, "--worker-lock", lockPath]); + + // A worker is always blocking — it does the real DESCRIBE lifecycle. + expect(generateFromEntryPoint).toHaveBeenCalledWith( + expect.objectContaining({ mode: "blocking" }), + ); + // It releases the SAME lock it was handed. + expect(releaseSpawnLock).toHaveBeenCalledWith(lockPath); + // It must never spawn another worker (recursion would never terminate). + expect(spawn).not.toHaveBeenCalled(); + expect(acquireSpawnLock).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/shared/src/cli/commands/generate-types.ts b/packages/shared/src/cli/commands/generate-types.ts index 1be7c7e2e..03c1631a1 100644 --- a/packages/shared/src/cli/commands/generate-types.ts +++ b/packages/shared/src/cli/commands/generate-types.ts @@ -1,19 +1,58 @@ +import { spawn } from "node:child_process"; import fs from "node:fs"; import path from "node:path"; -import { Command } from "commander"; +import { Command, Option } from "commander"; +import { + acquireSpawnLock, + getSpawnLockPath, + releaseSpawnLock, +} from "./spawn-lock.js"; /** - * Generate types command implementation + * Resolve the typegen pre-flight mode for the CLI. Defaults to "non-blocking" — + * a one-shot CLI can't describe in the background, so by default it never + * describes at all: it skips the warehouse probe AND every DESCRIBE, emits + * best-available types (cache where the SQL hash matches, else `result: unknown`) + * and returns immediately, never blocking on — or failing because of — a + * warehouse, even a RUNNING one. Pass `--wait` (commander sets `wait: true`) + * for a deliberate/CI invocation that should wait for a starting warehouse and + * fail fast on a stopped one. + */ +export function resolveTypegenMode(options?: { + wait?: boolean; +}): "non-blocking" | "blocking" { + return options?.wait ? "blocking" : "non-blocking"; +} + +/** Options parsed by commander for the generate-types command. */ +interface GenerateTypesOptions { + noCache?: boolean; + wait?: boolean; + /** + * Internal: present only on the detached worker invocation. Carries the path + * of the single-flight lock this worker must release when it finishes. Its + * presence is what marks an invocation as "the worker" — workers always run + * with `--wait`, so they never spawn another worker (only non-blocking runs + * spawn), which terminates the recursion. + */ + workerLock?: string; +} + +/** + * Generate types command implementation. Runs the library generate (which, in + * non-blocking mode, writes degraded types and returns immediately). This is the + * SAME work the worker performs in blocking mode in the background. */ async function runGenerateTypes( rootDir?: string, outFile?: string, warehouseId?: string, - options?: { noCache?: boolean }, + options?: GenerateTypesOptions, ) { try { const resolvedRootDir = rootDir || process.cwd(); const noCache = options?.noCache || false; + const mode = resolveTypegenMode(options); const typeGen = await import("@databricks/appkit/type-generator"); @@ -33,6 +72,7 @@ async function runGenerateTypes( outFile: resolvedOutFile, warehouseId: resolvedWarehouseId, noCache, + mode, }); console.log(`Generated query types: ${resolvedOutFile}`); } @@ -63,10 +103,146 @@ async function runGenerateTypes( console.error("Please install @databricks/appkit to use this command."); process.exit(1); } + // TypegenSyntaxError / TypegenFatalError carry a complete, actionable + // message (which queries failed and how to debug them). The stack trace + // points into appkit internals and is noise for app developers, so print + // only the message and exit non-zero instead of letting it bubble up. + if ( + error instanceof Error && + (error.name === "TypegenSyntaxError" || + error.name === "TypegenFatalError") + ) { + console.error(error.message); + process.exit(1); + } throw error; } } +/** + * Spawn the detached blocking worker that refreshes real types in the background + * after the foreground non-blocking generate has already written degraded types. + * + * Re-invokes THIS CLI (`process.execPath` + `process.argv[1]` — the bin entry + * that launched us) with `generate-types --wait --worker-lock ` plus + * the same positional target options the foreground used, so the worker writes + * to the same out file / reads the same query folder. The worker is: + * - `detached: true` + `.unref()` so it outlives this process (install/dev-setup + * can finish and exit while the worker keeps warming the warehouse). + * - `stdio: "ignore"` so it never holds the parent's pipes open or interleaves + * output into the install/dev log. + * + * Spawning is wrapped so any failure is non-fatal: the caller still has degraded + * types and exits 0. + * + * @param lockPath - the acquired single-flight lock; passed to the worker so it + * releases the SAME lock when it finishes. + * @param targets - the foreground's positional args, forwarded verbatim. + * @returns true if the worker was spawned, false if spawning threw. + */ +export function spawnTypegenWorker( + lockPath: string, + targets: { rootDir?: string; outFile?: string; warehouseId?: string }, +): boolean { + // The script the runtime launched us with (the `appkit` bin shim). Re-running + // it under the same node binary reproduces this exact CLI in the worker. + const cliEntry = process.argv[1]; + + // Forward the positionals in declaration order (rootDir, outFile, + // warehouseId). Stop at the first undefined so we never pass a literal + // "undefined" — commander would treat it as a positional value. (rootDir is + // effectively always set by commander's default, but guard anyway.) + const positionals: string[] = []; + for (const value of [targets.rootDir, targets.outFile, targets.warehouseId]) { + if (value === undefined) break; + positionals.push(value); + } + + const args = [ + // Forward the parent's node/loader flags so the worker runs under the same + // runtime. Critically this carries tsx's `--require`/`--import` when the CLI + // is run from source (`tsx index.ts …`); without them the worker would be + // `node index.ts …`, which can't parse TypeScript and dies silently — the + // degraded types would then never refresh. Empty for the built bin (plain + // `node bin/appkit.js`), so production behaviour is unchanged. + ...process.execArgv, + cliEntry, + "generate-types", + "--wait", + "--worker-lock", + lockPath, + ...positionals, + ]; + + try { + const child = spawn(process.execPath, args, { + detached: true, + stdio: "ignore", + }); + child.unref(); + return true; + } catch (error) { + // Non-fatal: the foreground already wrote degraded types. Log and move on. + console.error( + `Could not start background type refresh: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return false; + } +} + +/** + * The command action. Orchestrates the non-blocking foreground contract: + * 1. Run the library generate (writes degraded types immediately in non-blocking + * mode; does the full blocking lifecycle when this is the worker). + * 2. If this is a non-blocking, non-worker invocation, try to spawn the detached + * blocking worker behind the single-flight lock. If the lock is already held + * by a live worker, skip (single-flight) with a one-line note. Either way the + * foreground returns normally (exit 0). + * 3. If this IS the worker (`--worker-lock` present), it ran blocking above and + * releases the lock here (and via a process-exit guard, so a hard failure / + * process.exit still frees it). + */ +async function generateTypesAction( + rootDir: string | undefined, + outFile: string | undefined, + warehouseId: string | undefined, + options: GenerateTypesOptions, +) { + const isWorker = typeof options.workerLock === "string"; + + // A worker must always free its lock, even if the blocking generate throws or + // calls process.exit (TypegenFatalError → exit 1). The exit handler covers the + // process.exit / uncaught paths; the finally covers the normal return. + if (isWorker && options.workerLock) { + const lockPath = options.workerLock; + process.once("exit", () => releaseSpawnLock(lockPath)); + } + + try { + await runGenerateTypes(rootDir, outFile, warehouseId, options); + } finally { + if (isWorker && options.workerLock) { + releaseSpawnLock(options.workerLock); + } + } + + // Only a non-blocking, non-worker invocation spawns. A worker is always + // --wait (so resolveTypegenMode → "blocking"), which both prevents recursion + // and means we never get here for a worker. + if (!isWorker && resolveTypegenMode(options) === "non-blocking") { + const resolvedRootDir = rootDir || process.cwd(); + const lockPath = getSpawnLockPath(resolvedRootDir); + + if (acquireSpawnLock(lockPath)) { + spawnTypegenWorker(lockPath, { rootDir, outFile, warehouseId }); + } else { + console.log("Type refresh already in progress, skipping."); + } + } +} + export const generateTypesCommand = new Command("generate-types") .description("Generate TypeScript types from SQL queries") .argument("[rootDir]", "Root directory of the project", process.cwd()) @@ -77,6 +253,18 @@ export const generateTypesCommand = new Command("generate-types") ) .argument("[warehouseId]", "Databricks warehouse ID") .option("--no-cache", "Disable caching for type generation") + .option( + "--wait", + "Wait for warehouse readiness instead of degrading (use for CI)", + ) + // Internal: marks the detached background worker and carries the lock it must + // release. Hidden from --help; users should never pass it. + .addOption( + new Option( + "--worker-lock ", + "Internal: detached worker lock path", + ).hideHelp(), + ) .addHelpText( "after", ` @@ -84,6 +272,7 @@ Examples: $ appkit generate-types $ appkit generate-types . shared/appkit-types/analytics.d.ts $ appkit generate-types . shared/appkit-types/analytics.d.ts my-warehouse-id - $ appkit generate-types --no-cache`, + $ appkit generate-types --no-cache + $ appkit generate-types --wait # CI: wait for the warehouse and fail on a cold one`, ) - .action(runGenerateTypes); + .action(generateTypesAction); diff --git a/packages/shared/src/cli/commands/spawn-lock.test.ts b/packages/shared/src/cli/commands/spawn-lock.test.ts new file mode 100644 index 000000000..5566d4be4 --- /dev/null +++ b/packages/shared/src/cli/commands/spawn-lock.test.ts @@ -0,0 +1,93 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { + acquireSpawnLock, + getSpawnLockPath, + releaseSpawnLock, + SPAWN_LOCK_STALE_MS, +} from "./spawn-lock"; + +describe("spawn-lock", () => { + let tmpRoot: string; + let lockPath: string; + + beforeEach(() => { + tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "spawnlock-")); + lockPath = path.join(tmpRoot, "worker.lock"); + }); + + afterEach(() => { + fs.rmSync(tmpRoot, { recursive: true, force: true }); + }); + + test("getSpawnLockPath nests under node_modules/.databricks/appkit of the root", () => { + const root = path.join(os.tmpdir(), "some-project"); + expect(getSpawnLockPath(root)).toBe( + path.join( + root, + "node_modules", + ".databricks", + "appkit", + ".appkit-typegen-worker.lock", + ), + ); + }); + + test("acquire creates the lock (and its parent dirs) and returns true", () => { + const nested = path.join(tmpRoot, "node_modules", ".databricks", "lock"); + expect(acquireSpawnLock(nested)).toBe(true); + expect(fs.existsSync(nested)).toBe(true); + // Body records pid for debugging. + expect(fs.readFileSync(nested, "utf8")).toContain(String(process.pid)); + }); + + test("acquire returns false when a FRESH lock is already held (single-flight)", () => { + expect(acquireSpawnLock(lockPath)).toBe(true); + // Second caller sees a fresh lock and backs off. + expect(acquireSpawnLock(lockPath)).toBe(false); + // The original lock file is untouched. + expect(fs.existsSync(lockPath)).toBe(true); + }); + + test("acquire STEALS a stale lock (older than staleMs) and recreates it", () => { + fs.writeFileSync(lockPath, "99999 0\n"); + // Backdate mtime well beyond the stale window. + const old = new Date(Date.now() - (SPAWN_LOCK_STALE_MS + 60_000)); + fs.utimesSync(lockPath, old, old); + + expect(acquireSpawnLock(lockPath)).toBe(true); + // Re-created with our pid — proves it was stolen, not just observed. + expect(fs.readFileSync(lockPath, "utf8")).toContain(String(process.pid)); + }); + + test("a custom staleMs governs the fresh/stale boundary", () => { + fs.writeFileSync(lockPath, "1 0\n"); + const fiveSecAgo = new Date(Date.now() - 5_000); + fs.utimesSync(lockPath, fiveSecAgo, fiveSecAgo); + + // 10s window: 5s-old lock is still fresh → not acquired. + expect(acquireSpawnLock(lockPath, 10_000)).toBe(false); + // 1s window: 5s-old lock is stale → stolen. + expect(acquireSpawnLock(lockPath, 1_000)).toBe(true); + }); + + test("release unlinks the lock", () => { + acquireSpawnLock(lockPath); + expect(fs.existsSync(lockPath)).toBe(true); + releaseSpawnLock(lockPath); + expect(fs.existsSync(lockPath)).toBe(false); + }); + + test("release is a no-op (no throw) when the lock is already gone", () => { + expect(() => releaseSpawnLock(lockPath)).not.toThrow(); + }); + + test("release then re-acquire works (full single-flight cycle)", () => { + expect(acquireSpawnLock(lockPath)).toBe(true); + expect(acquireSpawnLock(lockPath)).toBe(false); // held + releaseSpawnLock(lockPath); + expect(acquireSpawnLock(lockPath)).toBe(true); // freed → re-acquirable + }); +}); diff --git a/packages/shared/src/cli/commands/spawn-lock.ts b/packages/shared/src/cli/commands/spawn-lock.ts new file mode 100644 index 000000000..d4f86c4a5 --- /dev/null +++ b/packages/shared/src/cli/commands/spawn-lock.ts @@ -0,0 +1,148 @@ +import fs from "node:fs"; +import path from "node:path"; + +/** + * How long a spawn lock is considered fresh. A held lock newer than this means a + * background worker is genuinely in flight, so the foreground skips spawning; + * older than this the lock is presumed orphaned (the worker crashed/was killed + * before it could release) and is stolen. + * + * Must comfortably exceed the worker's worst-case runtime: the blocking preflight + * wait cap (PREFLIGHT_WAIT_MAX_MS = 5 min in the type-generator) plus a DESCRIBE + * budget. Six minutes leaves ~1 min of headroom over a worker that waits the full + * preflight window and then describes. + */ +export const SPAWN_LOCK_STALE_MS = 6 * 60 * 1000; + +/** + * Resolve the on-disk path of the single-flight spawn lock for a project. + * + * Lives alongside the type-generator cache (`node_modules/.databricks/appkit/`) + * so it shares the same already-creatable, gitignored, per-project location and + * doesn't introduce a new directory. The lock is keyed only by project root, so + * concurrent `generate-types` invocations for the same project (postinstall + + * predev, say) contend for one lock and only one wins the spawn. + * + * @param rootDir - project root (the resolved first CLI argument / cwd). + * @returns absolute path to the lock file. + */ +export function getSpawnLockPath(rootDir: string): string { + return path.join( + rootDir, + "node_modules", + ".databricks", + "appkit", + ".appkit-typegen-worker.lock", + ); +} + +/** + * Try to acquire the single-flight spawn lock. + * + * Atomic create via `fs.writeFileSync(lockPath, ..., { flag: "wx" })` — `wx` + * fails (EEXIST) if the file already exists, so the create itself is the + * mutual-exclusion primitive (no check-then-create race between two foreground + * processes). The lock body records pid + timestamp purely for debugging. + * + * On EEXIST we stat the existing lock: + * - fresh (mtime within {@link staleMs}) → a worker is in flight, return false. + * - stale (mtime older than staleMs) → presumed orphaned; unlink and recreate. + * The recreate also uses `wx`, so if a competing process steals it first we + * lose the race cleanly and return false. + * + * Any unexpected error (permission, ENOENT on a missing parent dir we couldn't + * create, …) is swallowed and reported as "not acquired": failing to take the + * lock must never break the foreground — at worst we skip the background refresh. + * + * @param lockPath - path returned by {@link getSpawnLockPath}. + * @param staleMs - age beyond which a held lock is stolen. Defaults to + * {@link SPAWN_LOCK_STALE_MS}. + * @returns true if this caller now owns the lock (and must release it), false if + * another live worker holds it or the lock couldn't be taken. + */ +export function acquireSpawnLock( + lockPath: string, + staleMs: number = SPAWN_LOCK_STALE_MS, +): boolean { + const body = `${process.pid} ${Date.now()}\n`; + + try { + fs.mkdirSync(path.dirname(lockPath), { recursive: true }); + } catch { + // Parent dir creation is best-effort; the create below will surface any real + // problem and we'll treat it as "not acquired". + } + + try { + fs.writeFileSync(lockPath, body, { flag: "wx" }); + return true; + } catch (error) { + if (!isErrnoException(error) || error.code !== "EEXIST") { + // Unexpected failure — don't let lock IO break the foreground. + return false; + } + } + + // Lock exists — decide fresh vs stale. + let mtimeMs: number; + try { + mtimeMs = fs.statSync(lockPath).mtimeMs; + } catch { + // It vanished between the failed create and the stat (released by the + // worker). Try once more to take it. + return tryCreate(lockPath, body); + } + + if (Date.now() - mtimeMs < staleMs) { + // A worker is genuinely in flight. + return false; + } + + // Stale: steal it. Unlink (ignore ENOENT — someone else may have cleaned up) + // then re-create with `wx` so we still lose cleanly to a racing stealer. + try { + fs.unlinkSync(lockPath); + } catch (error) { + if (isErrnoException(error) && error.code !== "ENOENT") { + return false; + } + } + return tryCreate(lockPath, body); +} + +/** + * Release the spawn lock. Unlink, ignoring ENOENT (already gone — e.g. it was + * stolen as stale by another process, or never existed). Any other error is + * swallowed: a failed release at worst leaves a stale lock that the next caller + * will steal after {@link SPAWN_LOCK_STALE_MS}. + * + * @param lockPath - path returned by {@link getSpawnLockPath}. + */ +export function releaseSpawnLock(lockPath: string): void { + try { + fs.unlinkSync(lockPath); + } catch { + // ENOENT or any other error — releasing is best-effort. + } +} + +/** + * Attempt an atomic `wx` create, returning whether it succeeded. EEXIST (a + * racing creator beat us) and any other error map to false. + */ +function tryCreate(lockPath: string, body: string): boolean { + try { + fs.writeFileSync(lockPath, body, { flag: "wx" }); + return true; + } catch { + return false; + } +} + +/** + * Narrow an unknown caught value to a Node errno exception so `.code` is safe to + * read. + */ +function isErrnoException(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && "code" in error; +} diff --git a/packages/shared/src/cli/commands/type-generator.d.ts b/packages/shared/src/cli/commands/type-generator.d.ts index ce69781fa..d03dd547a 100644 --- a/packages/shared/src/cli/commands/type-generator.d.ts +++ b/packages/shared/src/cli/commands/type-generator.d.ts @@ -5,8 +5,22 @@ declare module "@databricks/appkit/type-generator" { outFile: string; warehouseId: string; noCache?: boolean; + // Warehouse preflight policy. "non-blocking" never probes the warehouse and + // never describes (emits cached/`unknown` types and returns immediately); + // "blocking" waits for a startable warehouse and treats a stopped one as + // fatal. + mode?: "non-blocking" | "blocking"; }): Promise; + export class TypegenSyntaxError extends Error { + readonly queries: Array<{ name: string; message: string }>; + readonly fatalQueries: Array<{ name: string; message: string }>; + } + + export class TypegenFatalError extends Error { + readonly queries: Array<{ name: string; message: string }>; + } + export function generateServingTypes(options: { outFile: string; noCache?: boolean; diff --git a/packages/shared/src/cli/index.ts b/packages/shared/src/cli/index.ts index 4d0ed65b7..aa60157c8 100644 --- a/packages/shared/src/cli/index.ts +++ b/packages/shared/src/cli/index.ts @@ -29,4 +29,4 @@ cmd.addCommand(docsCommand); cmd.addCommand(pluginCommand); cmd.addCommand(codemodCommand); -cmd.parse(); +await cmd.parseAsync(); diff --git a/template/package.json b/template/package.json index d29661427..94d8c8677 100644 --- a/template/package.json +++ b/template/package.json @@ -21,7 +21,7 @@ "test:smoke": "playwright install chromium && playwright test tests/smoke.spec.ts", "clean": "rm -rf client/dist dist build node_modules .smoke-test test-results playwright-report", "postinstall": "npm run typegen", - "prebuild": "npm run sync && npm run typegen", + "prebuild": "npm run sync && npm run typegen -- --wait", "predev": "npm run sync && npm run typegen", "sync": "appkit plugin sync --write --silent", "typegen": "appkit generate-types",