The dolphin engine handles MySQL parsing and AST conversion using the TiDB parser.
SQL String → TiDB Parser → TiDB AST → sqlc AST → Analysis/Codegen
convert.go- Converts TiDB AST nodes to sqlc AST nodesformat.go- MySQL-specific formatting (identifiers, types, parameters)parse.go- Entry point for parsing MySQL SQL
The TiDB parser (github.com/pingcap/tidb/pkg/parser) is used for MySQL parsing:
import (
pcast "github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/parser/types"
)pcast.SelectStmt,pcast.InsertStmt, etc. - Statement typespcast.ColumnNameExpr- Column referencepcast.FuncCallExpr- Function callpcast.BinaryOperationExpr- Binary expressionpcast.VariableExpr- MySQL user variable (@var)pcast.Join- JOIN clause with Left, Right, On, Using
Each TiDB node type has a corresponding converter method:
func (c *cc) convertSelectStmt(n *pcast.SelectStmt) *ast.SelectStmt {
return &ast.SelectStmt{
FromClause: c.convertTableRefsClause(n.From),
WhereClause: c.convert(n.Where),
// ...
}
}The main convert() method dispatches to specific converters:
func (c *cc) convert(node pcast.Node) ast.Node {
switch n := node.(type) {
case *pcast.SelectStmt:
return c.convertSelectStmt(n)
case *pcast.InsertStmt:
return c.convertInsertStmt(n)
// ...
}
}func (c *cc) convertColumnNameExpr(n *pcast.ColumnNameExpr) *ast.ColumnRef {
var items []ast.Node
if schema := n.Name.Schema.String(); schema != "" {
items = append(items, NewIdentifier(schema))
}
if table := n.Name.Table.String(); table != "" {
items = append(items, NewIdentifier(table))
}
items = append(items, NewIdentifier(n.Name.Name.String()))
return &ast.ColumnRef{Fields: &ast.List{Items: items}}
}func (c *cc) convertJoin(n *pcast.Join) *ast.List {
if n.Right != nil && n.Left != nil {
return &ast.List{
Items: []ast.Node{&ast.JoinExpr{
Jointype: ast.JoinType(n.Tp),
Larg: c.convert(n.Left),
Rarg: c.convert(n.Right),
Quals: c.convert(n.On),
UsingClause: convertUsing(n.Using),
}},
}
}
// No join - just return tables
// ...
}MySQL user variables (@var) are different from sqlc's @param syntax:
func (c *cc) convertVariableExpr(n *pcast.VariableExpr) ast.Node {
// Use VariableExpr to preserve as-is (NOT A_Expr which would be treated as sqlc param)
return &ast.VariableExpr{
Name: n.Name,
Location: n.OriginTextPosition(),
}
}func (c *cc) convertFuncCastExpr(n *pcast.FuncCastExpr) ast.Node {
typeName := types.TypeStr(n.Tp.GetType())
// Handle UNSIGNED/SIGNED specially
if typeName == "bigint" {
if mysql.HasUnsignedFlag(n.Tp.GetFlag()) {
typeName = "bigint unsigned"
} else {
typeName = "bigint signed"
}
}
return &ast.TypeCast{
Arg: c.convert(n.Expr),
TypeName: &ast.TypeName{Name: typeName},
}
}func convertColumnDef(def *pcast.ColumnDef) *ast.ColumnDef {
typeName := &ast.TypeName{Name: types.TypeToStr(def.Tp.GetType(), def.Tp.GetCharset())}
// Only add Typmods for types where length is meaningful
tp := def.Tp.GetType()
flen := def.Tp.GetFlen()
switch tp {
case mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString:
if flen >= 0 {
typeName.Typmods = &ast.List{
Items: []ast.Node{&ast.Integer{Ival: int64(flen)}},
}
}
// Don't add for DATETIME, TIMESTAMP - internal flen is not user-specified
}
// ...
}MySQL supports DELETE t1, t2 FROM t1 JOIN t2 ...:
func (c *cc) convertDeleteStmt(n *pcast.DeleteStmt) *ast.DeleteStmt {
if n.IsMultiTable && n.Tables != nil {
// Convert targets (t1.*, t2.*)
targets := &ast.List{}
for _, table := range n.Tables.Tables {
// Build ColumnRef for each target
}
stmt.Targets = targets
// Preserve JOINs in FromClause
stmt.FromClause = c.convertTableRefsClause(n.TableRefs).Items[0]
} else {
// Single-table DELETE
stmt.Relations = c.convertTableRefsClause(n.TableRefs)
}
}func (p *Parser) TypeName(ns, name string) string {
switch name {
case "bigint unsigned":
return "UNSIGNED"
case "bigint signed":
return "SIGNED"
}
return name
}
func (p *Parser) Param(n int) string {
return "?" // MySQL uses ? for all parameters
}Cause: New AST node type not handled in astutils/walk.go or astutils/rewrite.go
Solution: Add case for the node type in both files
Cause: InsertStmt case in rewrite.go didn't traverse OnDuplicateKeyUpdate
Solution: Add a.apply(n, "OnDuplicateKeyUpdate", nil, n.OnDuplicateKeyUpdate)
Cause: Converting VariableExpr to A_Expr with @ operator
Solution: Use ast.VariableExpr instead, which is not detected by named.IsParamSign()
Cause: Using internal flen for all types
Solution: Only populate Typmods for types where length is user-specified (varchar, char, etc.)
Tests that SQL can be:
- Parsed
- Formatted back to SQL
- Re-parsed
- Re-formatted to match
Tests the full sqlc pipeline:
- Parse schema and queries
- Analyze
- Generate code
- Compare with expected output