Skip to content

Commit 0b0ff5a

Browse files
committed
fix(migrations): robustly strip psql meta commands without breaking SQL
Replace naive PostgreSQL schema preprocessing with a single-pass state machine that correctly distinguishes top-level psql meta-commands from backslashes in valid SQL code, literals, identifiers, comments, and dollar-quoted bodies. The previous implementation would either leave pg_dump-style backslash directives in place for some schema-loading paths or strip too aggressively, breaking valid SQL containing: - Backslashes in string literals, including `E'...'` escapes and simple `standard_conforming_strings` variants - Meta-command text in comments or documentation - Dollar-quoted function bodies, including Unicode-tagged bodies - Double-quoted identifiers and identifiers containing `$` Changes: - Add engine-aware preprocessing helpers so rollback removal always applies while PostgreSQL preprocessing preserves server-side SQL, including PL/pgSQL bodies and extension/language DDL, before stripping psql meta-commands. - Replace the line-based PostgreSQL filter with a single-pass state machine that tracks single quotes, double quotes, dollar quotes, line comments, and nested block comments. - Handle escape-string prefixes, simple `standard_conforming_strings` value variants, Unicode dollar-quote tags, identifier-boundary checks, the documented `\\` separator command, and broader top-level unknown backslash directives. - Preserve SQL that follows a valid whitespace-delimited inline `\\` separator on the same psql meta-command line, matching tested `psql 17.9` behavior. - Strip semantic psql commands such as `\connect`, `\copy`, `\gexec`, `\i`, and `\ir` with warnings in parse/codegen paths, but reject them in schema-application paths where sqlc cannot reproduce their effects safely. - Reject psql conditionals (`\if`, `\elif`, `\else`, `\endif`) during preprocessing instead of flattening branches and changing SQL semantics. - Treat `standard_conforming_strings` and transaction-scoped script behavior as best-effort parsing aids rather than full psql emulation, and surface an explicit warning when preprocessing has to approximate those semantics. - Apply shared schema preprocessing and warning propagation in compiler parsing, `createdb`, `verify`, managed `vet`, and PostgreSQL sqltest seeding paths, including stable hashing of preprocessed read-only PostgreSQL fixtures. - Re-enable the `pg_dump` end-to-end fixture now that its schema parses and seeds successfully. - Add targeted regression coverage covering: * All documented psql meta-commands plus broader top-level unknown backslash directives * String literals with backslashes, escape-string prefixes, and simple `standard_conforming_strings` variants * Unicode dollar-quote tags, identifier-boundary cases, inline `\\` separator behavior, and rejected conditional directives * Semantic-command warnings and apply-mode rejections, `\copy ... from stdin`, line comments, nested block comments, quoted identifiers, helper edge cases, and schema preprocessing rollout * Bare CR normalization, CRLF preservation, and managed/PostgreSQL preprocessing behavior Performance improvements: - Pre-allocate output buffers with `strings.Builder.Grow()` - Keep parsing single-pass rather than rescanning line slices - Consolidate schema preprocessing into engine-aware helpers reused across schema-loading paths
1 parent 394bdc7 commit 0b0ff5a

File tree

14 files changed

+1856
-37
lines changed

14 files changed

+1856
-37
lines changed

internal/cmd/createdb.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func CreateDB(ctx context.Context, dir, filename, querySetName string, o *Option
8585
if err != nil {
8686
return fmt.Errorf("read file: %w", err)
8787
}
88-
ddl = append(ddl, migrations.RemoveRollbackStatements(string(contents)))
88+
ddlText, warnings, err := migrations.PreprocessSchemaForApply(string(contents), string(queryset.Engine))
89+
if err != nil {
90+
return err
91+
}
92+
for _, warning := range warnings {
93+
fmt.Fprintln(o.Stderr, warning)
94+
}
95+
ddl = append(ddl, ddlText)
8996
}
9097

9198
now := time.Now().UTC().UnixNano()

internal/cmd/generate.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ func parse(ctx context.Context, name, dir string, sql config.SQL, combo config.C
306306
return nil, true
307307
}
308308
if err := c.ParseCatalog(sql.Schema); err != nil {
309+
for _, warning := range c.Warnings() {
310+
fmt.Fprintln(stderr, warning)
311+
}
309312
fmt.Fprintf(stderr, "# package %s\n", name)
310313
if parserErr, ok := err.(*multierr.Error); ok {
311314
for _, fileErr := range parserErr.Errs() {
@@ -316,6 +319,9 @@ func parse(ctx context.Context, name, dir string, sql config.SQL, combo config.C
316319
}
317320
return nil, true
318321
}
322+
for _, warning := range c.Warnings() {
323+
fmt.Fprintln(stderr, warning)
324+
}
319325
if parserOpts.Debug.DumpCatalog {
320326
debug.Dump(c.Catalog())
321327
}

internal/cmd/process.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,12 @@ func processQuerySets(ctx context.Context, rp ResultProcessor, conf *config.Conf
122122
if err := grp.Wait(); err != nil {
123123
return err
124124
}
125-
if errored {
126-
for i, _ := range stderrs {
127-
if _, err := io.Copy(stderr, &stderrs[i]); err != nil {
128-
return err
129-
}
125+
for i := range stderrs {
126+
if _, err := io.Copy(stderr, &stderrs[i]); err != nil {
127+
return err
130128
}
129+
}
130+
if errored {
131131
return fmt.Errorf("errored")
132132
}
133133
return nil

internal/cmd/verify.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,14 @@ func Verify(ctx context.Context, dir, filename string, opts *Options) error {
102102
if err != nil {
103103
return fmt.Errorf("read file: %w", err)
104104
}
105-
ddl = append(ddl, migrations.RemoveRollbackStatements(string(contents)))
105+
ddlText, warnings, err := migrations.PreprocessSchemaForApply(string(contents), string(current.Engine))
106+
if err != nil {
107+
return err
108+
}
109+
for _, warning := range warnings {
110+
fmt.Fprintln(stderr, warning)
111+
}
112+
ddl = append(ddl, ddlText)
106113
}
107114

108115
var codegen plugin.GenerateRequest

internal/cmd/vet.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,14 @@ func (c *checker) fetchDatabaseUri(ctx context.Context, s config.SQL) (string, f
432432
if err != nil {
433433
return "", cleanup, fmt.Errorf("read file: %w", err)
434434
}
435-
ddl = append(ddl, migrations.RemoveRollbackStatements(string(contents)))
435+
ddlText, warnings, err := migrations.PreprocessSchemaForApply(string(contents), string(s.Engine))
436+
if err != nil {
437+
return "", cleanup, err
438+
}
439+
for _, warning := range warnings {
440+
fmt.Fprintln(c.Stderr, warning)
441+
}
442+
ddl = append(ddl, ddlText)
436443
}
437444

438445
resp, err := c.Client.CreateDatabase(ctx, &dbmanager.CreateDatabaseRequest{
@@ -549,7 +556,13 @@ func (c *checker) checkSQL(ctx context.Context, s config.SQL) error {
549556
if err != nil {
550557
return fmt.Errorf("read schema file: %w", err)
551558
}
552-
ddl := migrations.RemoveRollbackStatements(string(contents))
559+
ddl, warnings, err := migrations.PreprocessSchemaForApply(string(contents), string(s.Engine))
560+
if err != nil {
561+
return err
562+
}
563+
for _, warning := range warnings {
564+
fmt.Fprintln(c.Stderr, warning)
565+
}
553566
if _, err := db.ExecContext(ctx, ddl); err != nil {
554567
return fmt.Errorf("apply schema %s: %w", schema, err)
555568
}

internal/compiler/compile.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ func (c *Compiler) parseCatalog(schemas []string) error {
3838
merr.Add(filename, "", 0, err)
3939
continue
4040
}
41-
contents := migrations.RemoveRollbackStatements(string(blob))
42-
contents = migrations.RemovePsqlMetaCommands(contents)
41+
contents, warnings, err := migrations.PreprocessSchema(string(blob), string(c.conf.Engine))
42+
if err != nil {
43+
merr.Add(filename, string(blob), 0, err)
44+
continue
45+
}
46+
c.warns = append(c.warns, warnings...)
4347
c.schema = append(c.schema, contents)
4448

4549
// In database-only mode, we parse the schema to validate syntax

internal/compiler/engine.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type Compiler struct {
2828
selector selector
2929

3030
schema []string
31+
warns []string
3132

3233
// databaseOnlyMode indicates that the compiler should use database-only analysis
3334
// and skip building the internal catalog from schema files (analyzer.database: only)
@@ -138,6 +139,12 @@ func (c *Compiler) Result() *Result {
138139
return c.result
139140
}
140141

142+
// Warnings returns a copy of any non-fatal schema preprocessing warnings
143+
// collected while parsing the catalog.
144+
func (c *Compiler) Warnings() []string {
145+
return append([]string(nil), c.warns...)
146+
}
147+
141148
func (c *Compiler) Close(ctx context.Context) {
142149
if c.analyzer != nil {
143150
c.analyzer.Close(ctx)
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
{
2-
"contexts": ["base"],
3-
"meta": {
4-
"invalid_schema": true
5-
}
2+
"contexts": ["base"]
63
}

0 commit comments

Comments
 (0)