sqlc/internal/engine/sqlite/parse_test.go at fe7673b73ad9d7b8b850806ec21655156834fb5b · sqlc-dev/sqlc · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package sqlite

import (
	"strings"
	"testing"

	"github.com/sqlc-dev/sqlc/internal/source"
)

// TestParseNonASCIIComment verifies that non-ASCII characters in SQL comments
// do not corrupt the plucked query text.
//
// ANTLR4 stores the input as []rune so all token positions are rune indices,
// not byte offsets. source.Pluck (and the rest of the pipeline) treats
// StmtLocation/StmtLen as byte offsets. For multi-byte UTF-8 characters the
// two differ, which previously caused the plucked query to be truncated by one
// byte per extra byte in each non-ASCII character.
func TestParseNonASCIIComment(t *testing.T) {
	p := NewParser()

	tests := []struct {
		name string
		sql  string
	}{
		{
			name: "2-byte char (U+00DC Ü) in dash comment",
			sql:  "-- name: GetUser :one\n-- Ünïcode comment\nSELECT id FROM users WHERE id = ?",
		},
		{
			name: "3-byte char (U+2665 ♥) in dash comment",
			sql:  "-- name: GetUser :one\n-- ♥ love\nSELECT id FROM users WHERE id = ?",
		},
		{
			name: "4-byte char (U+1D11E 𝄞) in dash comment",
			sql:  "-- name: GetUser :one\n-- 𝄞 music\nSELECT id FROM users WHERE id = ?",
		},
		{
			name: "multiple non-ASCII chars in comment",
			sql:  "-- name: GetUser :one\n-- héllo wörld\nSELECT id FROM users WHERE id = ?",
		},
		{
			name: "non-ASCII only in first of two statements",
			sql:  "-- name: Q1 :one\n-- Ü\nSELECT 1;\n\n-- name: Q2 :one\nSELECT 2",
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			stmts, err := p.Parse(strings.NewReader(tc.sql))
			if err != nil {
				t.Fatalf("Parse error: %v", err)
			}
			if len(stmts) == 0 {
				t.Fatal("expected at least one statement")
			}

			// For every parsed statement, verify that the plucked text is a
			// valid substring of the original SQL (not truncated mid-character).
			for i, stmt := range stmts {
				raw := stmt.Raw
				plucked, err := source.Pluck(tc.sql, raw.StmtLocation, raw.StmtLen)
				if err != nil {
					t.Fatalf("stmt %d: Pluck error: %v", i, err)
				}
				if !strings.Contains(tc.sql, plucked) {
					t.Errorf("stmt %d: plucked text is not a substring of the input\ngot:  %q\ninput: %q", i, plucked, tc.sql)
				}
				if plucked == "" {
					t.Errorf("stmt %d: plucked text is empty", i)
				}
			}

			// For the single-statement cases the plucked text must equal the
			// full input, since there is exactly one statement and no trailing
			// semicolon to exclude.
			if len(stmts) == 1 {
				raw := stmts[0].Raw
				plucked, _ := source.Pluck(tc.sql, raw.StmtLocation, raw.StmtLen)
				if plucked != tc.sql {
					t.Errorf("single-statement pluck mismatch\ngot:  %q\nwant: %q", plucked, tc.sql)
				}
			}

			// For the two-statement case, verify each statement contains its
			// expected SELECT.
			if len(stmts) == 2 {
				for i, want := range []string{"SELECT 1", "SELECT 2"} {
					raw := stmts[i].Raw
					plucked, _ := source.Pluck(tc.sql, raw.StmtLocation, raw.StmtLen)
					if !strings.Contains(plucked, want) {
						t.Errorf("stmt %d: plucked text %q does not contain %q", i, plucked, want)
					}
				}
			}
		})
	}
}