diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 7dcae4c7b12..8c5ee72bbad 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -18,6 +18,7 @@ * Mark vector search index index_subtype as backend_default to prevent drift after deployment ([#5454](https://github.com/databricks/cli/pull/5454)). * `bundle deployment migrate`: handle resources added to or removed from `databricks.yml` since the last Terraform deploy ([#5463](https://github.com/databricks/cli/pull/5463)). * Add the `genie_spaces` bundle resource for managing Databricks Genie spaces as code, plus `bundle generate genie-space` to import an existing space. Direct deployment engine only ([#5282](https://github.com/databricks/cli/pull/5282)). +* Fix spurious recreate of schemas and volumes whose names use mixed case ([#5531](https://github.com/databricks/cli/pull/5531)). ### Dependency updates diff --git a/acceptance/bundle/invariant/configs/schema_uppercase_name.yml.tmpl b/acceptance/bundle/invariant/configs/schema_uppercase_name.yml.tmpl new file mode 100644 index 00000000000..abf993785f2 --- /dev/null +++ b/acceptance/bundle/invariant/configs/schema_uppercase_name.yml.tmpl @@ -0,0 +1,14 @@ +bundle: + name: test-bundle-$UNIQUE_NAME + +# Reproduce: UC API normalizes identifier names to lowercase, causing +# false drift on the second deploy when the config uses mixed-case names. +# Volumes are covered by acceptance/bundle/resources/volumes/uppercase-name +# instead: the Terraform provider rejects an uppercase volume schema_name +# ("inconsistent final plan"), and the migrate invariant always deploys via +# Terraform first, so an uppercase volume cannot live in this shared config. +resources: + schemas: + foo: + catalog_name: main + name: MySCHEMA-$UNIQUE_NAME diff --git a/acceptance/bundle/invariant/configs/volume_uppercase_name.yml.tmpl b/acceptance/bundle/invariant/configs/volume_uppercase_name.yml.tmpl new file mode 100644 index 00000000000..c6caa55caa7 --- /dev/null +++ b/acceptance/bundle/invariant/configs/volume_uppercase_name.yml.tmpl @@ -0,0 +1,22 @@ +bundle: + name: test-bundle-$UNIQUE_NAME + +# Reproduce: the UC API lowercases identifier names, causing false drift on the +# second deploy when the config uses mixed-case names. This covers a volume's +# catalog_name, schema_name and name on the direct engine. +# +# Excluded from the migrate invariant: that test deploys via Terraform first, +# and the TF provider rejects an uppercase volume schema_name ("inconsistent +# final plan"). +resources: + schemas: + foo: + catalog_name: main + name: MySCHEMA-$UNIQUE_NAME + + volumes: + bar: + catalog_name: main + schema_name: ${resources.schemas.foo.name} + name: MyVOL-$UNIQUE_NAME + volume_type: MANAGED diff --git a/acceptance/bundle/invariant/continue_293/out.test.toml b/acceptance/bundle/invariant/continue_293/out.test.toml index 62de900a9ce..53c82a51868 100644 --- a/acceptance/bundle/invariant/continue_293/out.test.toml +++ b/acceptance/bundle/invariant/continue_293/out.test.toml @@ -34,6 +34,7 @@ EnvMatrix.INPUT_CONFIG = [ "registered_model.yml.tmpl", "schema.yml.tmpl", "schema_grant_ref.yml.tmpl", + "schema_uppercase_name.yml.tmpl", "schema_with_grants.yml.tmpl", "secret_scope.yml.tmpl", "secret_scope_default_backend_type.yml.tmpl", @@ -43,5 +44,6 @@ EnvMatrix.INPUT_CONFIG = [ "vector_search_endpoint.yml.tmpl", "vector_search_index.yml.tmpl", "volume.yml.tmpl", - "volume_external.yml.tmpl" + "volume_external.yml.tmpl", + "volume_uppercase_name.yml.tmpl" ] diff --git a/acceptance/bundle/invariant/migrate/out.test.toml b/acceptance/bundle/invariant/migrate/out.test.toml index 62de900a9ce..53c82a51868 100644 --- a/acceptance/bundle/invariant/migrate/out.test.toml +++ b/acceptance/bundle/invariant/migrate/out.test.toml @@ -34,6 +34,7 @@ EnvMatrix.INPUT_CONFIG = [ "registered_model.yml.tmpl", "schema.yml.tmpl", "schema_grant_ref.yml.tmpl", + "schema_uppercase_name.yml.tmpl", "schema_with_grants.yml.tmpl", "secret_scope.yml.tmpl", "secret_scope_default_backend_type.yml.tmpl", @@ -43,5 +44,6 @@ EnvMatrix.INPUT_CONFIG = [ "vector_search_endpoint.yml.tmpl", "vector_search_index.yml.tmpl", "volume.yml.tmpl", - "volume_external.yml.tmpl" + "volume_external.yml.tmpl", + "volume_uppercase_name.yml.tmpl" ] diff --git a/acceptance/bundle/invariant/migrate/test.toml b/acceptance/bundle/invariant/migrate/test.toml index 9492aff72cd..a121cbf6475 100644 --- a/acceptance/bundle/invariant/migrate/test.toml +++ b/acceptance/bundle/invariant/migrate/test.toml @@ -26,3 +26,7 @@ EnvMatrixExclude.no_sql_warehouse = ["INPUT_CONFIG=sql_warehouse.yml.tmpl"] # The 1000-task scale case is covered by no_drift. Running it here adds ~1.5 min # per variant (deploy + migrate + plan at 1000 tasks) without incremental coverage. EnvMatrixExclude.no_pydabs_1000_tasks = ["INPUT_CONFIG=job_pydabs_1000_tasks.yml.tmpl"] + +# migrate deploys via Terraform first, and the TF provider rejects an uppercase +# volume schema_name ("inconsistent final plan"). Covered by no_drift on direct. +EnvMatrixExclude.no_volume_uppercase = ["INPUT_CONFIG=volume_uppercase_name.yml.tmpl"] diff --git a/acceptance/bundle/invariant/no_drift/out.test.toml b/acceptance/bundle/invariant/no_drift/out.test.toml index 62de900a9ce..53c82a51868 100644 --- a/acceptance/bundle/invariant/no_drift/out.test.toml +++ b/acceptance/bundle/invariant/no_drift/out.test.toml @@ -34,6 +34,7 @@ EnvMatrix.INPUT_CONFIG = [ "registered_model.yml.tmpl", "schema.yml.tmpl", "schema_grant_ref.yml.tmpl", + "schema_uppercase_name.yml.tmpl", "schema_with_grants.yml.tmpl", "secret_scope.yml.tmpl", "secret_scope_default_backend_type.yml.tmpl", @@ -43,5 +44,6 @@ EnvMatrix.INPUT_CONFIG = [ "vector_search_endpoint.yml.tmpl", "vector_search_index.yml.tmpl", "volume.yml.tmpl", - "volume_external.yml.tmpl" + "volume_external.yml.tmpl", + "volume_uppercase_name.yml.tmpl" ] diff --git a/acceptance/bundle/invariant/test.toml b/acceptance/bundle/invariant/test.toml index 82831127ac2..3c16332861d 100644 --- a/acceptance/bundle/invariant/test.toml +++ b/acceptance/bundle/invariant/test.toml @@ -52,6 +52,7 @@ EnvMatrix.INPUT_CONFIG = [ "registered_model.yml.tmpl", "schema.yml.tmpl", "schema_grant_ref.yml.tmpl", + "schema_uppercase_name.yml.tmpl", "schema_with_grants.yml.tmpl", "secret_scope.yml.tmpl", "secret_scope_default_backend_type.yml.tmpl", @@ -62,6 +63,7 @@ EnvMatrix.INPUT_CONFIG = [ "vector_search_index.yml.tmpl", "volume.yml.tmpl", "volume_external.yml.tmpl", + "volume_uppercase_name.yml.tmpl", ] [EnvMatrixExclude] diff --git a/acceptance/bundle/resources/volumes/uppercase-name/databricks.yml b/acceptance/bundle/resources/volumes/uppercase-name/databricks.yml new file mode 100644 index 00000000000..3a8448bddda --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/databricks.yml @@ -0,0 +1,15 @@ +bundle: + name: test-bundle + +resources: + schemas: + schema1: + catalog_name: main + name: MySCHEMA + + volumes: + vol1: + catalog_name: main + schema_name: MySCHEMA + name: MyVOLUME + volume_type: MANAGED diff --git a/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.direct.txt b/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.direct.txt new file mode 100644 index 00000000000..440c0314386 --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.direct.txt @@ -0,0 +1,61 @@ +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! +{ + "method": "POST", + "path": "/api/2.1/unity-catalog/schemas", + "body": { + "catalog_name": "main", + "name": "MySCHEMA" + } +} +{ + "method": "POST", + "path": "/api/2.1/unity-catalog/volumes", + "body": { + "catalog_name": "main", + "name": "MyVOLUME", + "schema_name": "MySCHEMA", + "volume_type": "MANAGED" + } +} +=== Bundle summary shows lowercase IDs after UC normalizes names +{ + "schema_id": "main.myschema", + "volume_id": "main.myschema.myvolume" +} +=== Plan on no-op redeploy: mixed-case names are skipped, not recreated +[ + { + "resource": "resources.schemas.schema1", + "changes": { + "name": { + "action": "skip", + "reason": "uc_case" + } + } + }, + { + "resource": "resources.volumes.vol1", + "changes": { + "name": { + "action": "skip", + "reason": "uc_case" + }, + "schema_name": { + "action": "skip", + "reason": "uc_case" + }, + "storage_location": { + "action": "skip", + "reason": "backend_default" + } + } + } +] +=== Redeploy without changes - should be a no-op +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.terraform.txt b/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.terraform.txt new file mode 100644 index 00000000000..2cd89623003 --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/out.deploy.terraform.txt @@ -0,0 +1,27 @@ +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Error: terraform apply: exit status 1 + +Error: Provider produced inconsistent final plan + +When expanding the plan for databricks_volume.vol1 to include new values +learned so far during apply, provider +"registry.terraform.io/databricks/databricks" produced an invalid new value +for .schema_name: was cty.StringVal("MySCHEMA"), but now +cty.StringVal("myschema"). + +This is a bug in the provider, which should be reported in the provider's own +issue tracker. + + +Updating deployment state... + +Exit code: 1 +{ + "method": "POST", + "path": "/api/2.1/unity-catalog/schemas", + "body": { + "catalog_name": "main", + "name": "MySCHEMA" + } +} diff --git a/acceptance/bundle/resources/volumes/uppercase-name/out.test.toml b/acceptance/bundle/resources/volumes/uppercase-name/out.test.toml new file mode 100644 index 00000000000..e849ec85ace --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = true +RequiresUnityCatalog = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/resources/volumes/uppercase-name/output.txt b/acceptance/bundle/resources/volumes/uppercase-name/output.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acceptance/bundle/resources/volumes/uppercase-name/script b/acceptance/bundle/resources/volumes/uppercase-name/script new file mode 100644 index 00000000000..e8521397d3e --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/script @@ -0,0 +1,20 @@ +echo "*" > .gitignore +errcode $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt +print_requests.py //unity-catalog >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + +if [ "$DATABRICKS_BUNDLE_ENGINE" = "direct" ]; then + echo "=== Bundle summary shows lowercase IDs after UC normalizes names" >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + $CLI bundle summary -o json \ + | jq '{schema_id: .resources.schemas.schema1.id, volume_id: .resources.volumes.vol1.id}' \ + >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + + echo "=== Plan on no-op redeploy: mixed-case names are skipped, not recreated" >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + $CLI bundle plan -o json \ + | jq '.plan | to_entries | map({resource: .key, changes: (.value.changes // {} | map_values({action, reason}))})' \ + >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + + echo "=== Redeploy without changes - should be a no-op" >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt + # &>> requires bash 4+; macOS CI runs the stock bash 3.2, so use >> ... 2>&1. + errcode $CLI bundle deploy >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt 2>&1 + print_requests.py //unity-catalog >> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt +fi diff --git a/acceptance/bundle/resources/volumes/uppercase-name/test.toml b/acceptance/bundle/resources/volumes/uppercase-name/test.toml new file mode 100644 index 00000000000..8331e99f292 --- /dev/null +++ b/acceptance/bundle/resources/volumes/uppercase-name/test.toml @@ -0,0 +1,11 @@ +Cloud = true +RequiresUnityCatalog = true + +Ignore = [ + ".databricks", +] + +# TF provider fails with "inconsistent final plan" when schema_name contains +# uppercase letters that UC normalizes to lowercase; exclude from cloud runs. +[EnvMatrixExclude] +no_terraform_on_cloud = ["CONFIG_Cloud=true", "DATABRICKS_BUNDLE_ENGINE=terraform"] diff --git a/bundle/deployplan/plan.go b/bundle/deployplan/plan.go index 2fb5d38c806..6254e92b802 100644 --- a/bundle/deployplan/plan.go +++ b/bundle/deployplan/plan.go @@ -100,7 +100,6 @@ type ChangeDesc struct { const ( ReasonBackendDefault = "backend_default" ReasonAlias = "alias" - ReasonURLNormalization = "url_normalization" ReasonRemoteAlreadySet = "remote_already_set" ReasonEmpty = "empty" ReasonCustom = "custom" diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 5591626cd75..fea24b79808 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -375,6 +375,12 @@ func addPerFieldActions(ctx context.Context, adapter *dresources.Adapter, change } else if reason, ok := shouldSkipBackendDefault(generatedCfg, path, ch); ok { ch.Action = deployplan.Skip ch.Reason = reason + } else if reason, ok := shouldSkipNormalized(cfg, path, ch); ok { + ch.Action = deployplan.Skip + ch.Reason = reason + } else if reason, ok := shouldSkipNormalized(generatedCfg, path, ch); ok { + ch.Action = deployplan.Skip + ch.Reason = reason } else if action, reason := shouldUpdateOrRecreate(cfg, path); action != deployplan.Undefined { ch.Action = action ch.Reason = reason @@ -434,6 +440,29 @@ func shouldSkip(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNo return "", false } +// shouldSkipNormalized skips a change that is a false diff caused by UC API +// normalization: the API lowercases identifier names (normalize_case) and strips +// trailing slashes from storage URLs (normalize_slash). The direct engine saves +// local config to state, so without this the next plan sees the original value +// against the normalized remote value and triggers a spurious recreate/update. +func shouldSkipNormalized(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode, ch *deployplan.ChangeDesc) (string, bool) { + if cfg == nil { + return "", false + } + newStr, newOk := ch.New.(string) + remoteStr, remoteOk := ch.Remote.(string) + if !newOk || !remoteOk { + return "", false + } + if reason, ok := findMatchingRule(path, cfg.NormalizeCase); ok && strings.EqualFold(newStr, remoteStr) { + return reason, true + } + if reason, ok := findMatchingRule(path, cfg.NormalizeSlash); ok && strings.TrimRight(newStr, "/") == strings.TrimRight(remoteStr, "/") { + return reason, true + } + return "", false +} + func shouldUpdateOrRecreate(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode) (deployplan.ActionType, string) { if cfg == nil { return deployplan.Undefined, "" diff --git a/bundle/direct/dresources/config.go b/bundle/direct/dresources/config.go index 70f6dbb1d8d..a425d7d7208 100644 --- a/bundle/direct/dresources/config.go +++ b/bundle/direct/dresources/config.go @@ -59,6 +59,14 @@ type ResourceLifecycleConfig struct { // UpdateIDOnChanges: field patterns that trigger UpdateWithID when changed. UpdateIDOnChanges []FieldRule `yaml:"update_id_on_changes,omitempty"` + // NormalizeCase: string field patterns the UC API lowercases on write. + // A change is skipped when local and remote differ only by case. + NormalizeCase []FieldRule `yaml:"normalize_case,omitempty"` + + // NormalizeSlash: string field patterns the UC API strips trailing slashes from. + // A change is skipped when local and remote differ only by trailing slashes. + NormalizeSlash []FieldRule `yaml:"normalize_slash,omitempty"` + // BackendDefaults: fields where the backend may set defaults. // When old and new are nil but remote is set, and the remote value matches allowed values (if specified), the change is skipped. BackendDefaults []BackendDefaultRule `yaml:"backend_defaults,omitempty"` @@ -80,6 +88,8 @@ var empty = ResourceLifecycleConfig{ IgnoreLocalChanges: nil, RecreateOnChanges: nil, UpdateIDOnChanges: nil, + NormalizeCase: nil, + NormalizeSlash: nil, BackendDefaults: nil, } diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 6b96ad5eea6..5a017a4b24d 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -311,6 +311,15 @@ resources: reason: immutable - field: storage_root reason: immutable + normalize_case: + # UC lowercases identifier names; remote returns "myschema" for config "MySchema". + - field: name + reason: uc_case + - field: catalog_name + reason: uc_case + normalize_slash: + - field: storage_root + reason: uc_strips_trailing_slash external_locations: recreate_on_changes: @@ -341,6 +350,20 @@ resources: update_id_on_changes: - field: name reason: id_changes + normalize_case: + # UC lowercases identifier names. name is in update_id_on_changes, so a + # case-only diff would otherwise trigger a spurious rename (DoUpdateWithID). + - field: catalog_name + reason: uc_case + - field: schema_name + reason: uc_case + - field: name + reason: uc_case + normalize_slash: + # UC strips trailing slashes on create; matches the Terraform provider's suppressLocationDiff. + # https://github.com/databricks/terraform-provider-databricks/blob/v1.65.1/catalog/resource_volume.go#L25 + - field: storage_location + reason: uc_strips_trailing_slash backend_defaults: # storage_location is Computed; backend generates it for managed volumes. - field: storage_location diff --git a/bundle/direct/dresources/volume.go b/bundle/direct/dresources/volume.go index 73bf7a79b40..6c96e66eccb 100644 --- a/bundle/direct/dresources/volume.go +++ b/bundle/direct/dresources/volume.go @@ -6,9 +6,7 @@ import ( "strings" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/structs/structpath" "github.com/databricks/cli/libs/utils" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -114,35 +112,6 @@ func (r *ResourceVolume) DoDelete(ctx context.Context, id string, _ *catalog.Cre return r.client.Volumes.DeleteByName(ctx, id) } -// OverrideChangeDesc suppresses drift for storage_location when the only difference -// is a trailing slash. The UC API strips trailing slashes on create, so remote returns -// "s3://bucket/path" while the config may have "s3://bucket/path/". -// -// This matches the Terraform provider's suppressLocationDiff behavior. -// https://github.com/databricks/terraform-provider-databricks/blob/v1.65.1/catalog/resource_volume.go#L25 -func (*ResourceVolume) OverrideChangeDesc(_ context.Context, path *structpath.PathNode, change *ChangeDesc, _ *catalog.VolumeInfo) error { - if change.Action == deployplan.Skip { - return nil - } - - if path.String() != "storage_location" { - return nil - } - - newStr, newOk := change.New.(string) - remoteStr, remoteOk := change.Remote.(string) - if !newOk || !remoteOk { - return nil - } - - if newStr != remoteStr && strings.TrimRight(newStr, "/") == strings.TrimRight(remoteStr, "/") { - change.Action = deployplan.Skip - change.Reason = deployplan.ReasonURLNormalization - } - - return nil -} - func getNameFromID(id string) (string, error) { items := strings.Split(id, ".") if len(items) == 0 { diff --git a/libs/testserver/schemas.go b/libs/testserver/schemas.go index 1d4dc79e7ac..fa190c6694a 100644 --- a/libs/testserver/schemas.go +++ b/libs/testserver/schemas.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "net/http" + "strings" "dario.cat/mergo" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -23,6 +24,8 @@ func (s *FakeWorkspace) SchemasCreate(req Request) Response { } } + // UC normalizes schema names to lowercase. + schema.Name = strings.ToLower(schema.Name) schema.FullName = schema.CatalogName + "." + schema.Name schema.ForceSendFields = []string{"BrowseOnly"} schema.CatalogType = "MANAGED_CATALOG" diff --git a/libs/testserver/volumes.go b/libs/testserver/volumes.go index 76b5fdf3e5f..88eae7ac021 100644 --- a/libs/testserver/volumes.go +++ b/libs/testserver/volumes.go @@ -20,6 +20,9 @@ func (s *FakeWorkspace) VolumesCreate(req Request) Response { } } + // UC normalizes schema and volume names to lowercase. + volume.SchemaName = strings.ToLower(volume.SchemaName) + volume.Name = strings.ToLower(volume.Name) volume.FullName = volume.CatalogName + "." + volume.SchemaName + "." + volume.Name if volume.StorageLocation != "" && volume.VolumeType != catalog.VolumeTypeExternal {