Skip to content

Commit 00142c6

Browse files
committed
wip 5
1 parent 603370f commit 00142c6

File tree

11 files changed

+347
-121
lines changed

11 files changed

+347
-121
lines changed

change-detection-prd.md

Lines changed: 131 additions & 104 deletions
Large diffs are not rendered by default.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Component Identity & Change Detection
2+
3+
The `component identity` and `component diff-identity` subcommands compute deterministic fingerprints of component build inputs. For example, CI can compute fingerprints for the base and head commits of a PR, then diff them to determine exactly which components have changed and need to be rebuilt/tested.
4+
5+
```bash
6+
# Typical CI workflow
7+
git checkout $BASE_REF && azldev component identity -a -O json > base.json
8+
git checkout $HEAD_REF && azldev component identity -a -O json > head.json
9+
azldev component diff-identity base.json head.json -O json -c
10+
# → {"changed": ["curl"], "added": ["wget"], "removed": [], "unchanged": []}
11+
```
12+
13+
## Fingerprint Inputs
14+
15+
A component's fingerprint is a SHA256 combining:
16+
17+
1. **Config hash**`hashstructure.Hash()` of the resolved `ComponentConfig` (after all merging). Fields tagged `fingerprint:"-"` are excluded.
18+
2. **Source identity** — content hash for local specs (all files in the spec directory), commit hash for upstream.
19+
3. **Overlay file hashes** — SHA256 of each file referenced by overlay `Source` fields.
20+
4. **Distro name + version**
21+
5. **Affects commit count** — number of `Affects: <component>` commits in the project repo.
22+
23+
Global change propagation works automatically: the fingerprint operates on the fully-merged config, so a change to a distro or group default changes the resolved config of every inheriting component.
24+
25+
## `fingerprint:"-"` Tag System
26+
27+
The `hashstructure` library uses `TagName: "fingerprint"`. Untagged fields are **included by default** (safe default: false positive > false negative).
28+
29+
A guard test (`TestAllFingerprintedFieldsHaveDecision`) reflects over all fingerprinted structs and maintains a bi-directional allowlist of exclusions. It fails if a `fingerprint:"-"` tag is added without registering it, or if a registered exclusion's tag is removed.
30+
31+
### Adding a New Config Field
32+
33+
1. Add the field to the struct in `internal/projectconfig/`.
34+
2. **If NOT a build input**: add `fingerprint:"-"` to the struct tag and register it in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`.
35+
3. **If a build input**: do nothing — included by default.
36+
4. Run `mage unit`.
37+
38+
### Adding a New Source Type
39+
40+
1. Implement `SourceIdentityProvider` on your provider (see `localidentity.go` for a simple example).
41+
2. Add a case to `sourceManager.ResolveSourceIdentity()` in `sourcemanager.go`.
42+
3. Add tests in `identityprovider_test.go`.
43+
44+
## CLI
45+
46+
### `azldev component identity`
47+
48+
Compute fingerprints. Uses standard component filter flags (`-a`, `-p`, `-g`, `-s`). Exposed as an MCP tool.
49+
50+
### `azldev component diff-identity`
51+
52+
Compare two identity JSON files. The `--changed-only` / `-c` flag filters to only changed and added components (the build queue). Applies to both table and JSON output.
53+
54+
## Known Limitations
55+
56+
- It is difficult to determine WHY a diff occurred (e.g., which specific field changed) since the fingerprint is a single opaque hash. The JSON output includes an `inputs` breakdown (`configHash`, `sourceIdentity`, `overlayFileHashes`, etc.) that can help narrow it down by comparing the two identity files manually.

docs/user/reference/cli/azldev_component_diff-identity.md

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/app/azldev/cmds/component/diffidentity.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ const diffIdentityArgCount = 2
2222

2323
// NewDiffIdentityCommand constructs a [cobra.Command] for "component diff-identity".
2424
func NewDiffIdentityCommand() *cobra.Command {
25+
var options struct {
26+
ChangedOnly bool
27+
}
28+
2529
cmd := &cobra.Command{
2630
Use: "diff-identity <base.json> <head.json>",
2731
Short: "Compare two identity files and report changed components",
@@ -37,11 +41,14 @@ CI uses the 'changed' and 'added' lists to determine the build queue.`,
3741
Args: cobra.ExactArgs(diffIdentityArgCount),
3842
RunE: azldev.RunFuncWithoutRequiredConfigWithExtraArgs(
3943
func(env *azldev.Env, args []string) (interface{}, error) {
40-
return DiffIdentities(env, args[0], args[1])
44+
return DiffIdentities(env, args[0], args[1], options.ChangedOnly)
4145
},
4246
),
4347
}
4448

49+
cmd.Flags().BoolVarP(&options.ChangedOnly, "changed-only", "c", false,
50+
"Only show changed and added components (the build queue)")
51+
4552
return cmd
4653
}
4754

@@ -74,7 +81,7 @@ type IdentityDiffReport struct {
7481
}
7582

7683
// DiffIdentities reads two identity JSON files and computes the diff.
77-
func DiffIdentities(env *azldev.Env, basePath string, headPath string) (interface{}, error) {
84+
func DiffIdentities(env *azldev.Env, basePath string, headPath string, changedOnly bool) (interface{}, error) {
7885
baseIdentities, err := readIdentityFile(env, basePath)
7986
if err != nil {
8087
return nil, fmt.Errorf("reading base identity file %#q:\n%w", basePath, err)
@@ -85,7 +92,7 @@ func DiffIdentities(env *azldev.Env, basePath string, headPath string) (interfac
8592
return nil, fmt.Errorf("reading head identity file %#q:\n%w", headPath, err)
8693
}
8794

88-
report := ComputeDiff(baseIdentities, headIdentities)
95+
report := ComputeDiff(baseIdentities, headIdentities, changedOnly)
8996

9097
// Return table-friendly results for table/CSV format, or the report for JSON.
9198
if env.DefaultReportFormat() == azldev.ReportFormatJSON {
@@ -121,7 +128,8 @@ func readIdentityFile(
121128
}
122129

123130
// ComputeDiff compares base and head identity maps and produces a diff report.
124-
func ComputeDiff(base map[string]string, head map[string]string) *IdentityDiffReport {
131+
// When changedOnly is true, the Removed and Unchanged lists are left empty.
132+
func ComputeDiff(base map[string]string, head map[string]string, changedOnly bool) *IdentityDiffReport {
125133
report := &IdentityDiffReport{}
126134

127135
// Check base components against head.
@@ -130,11 +138,15 @@ func ComputeDiff(base map[string]string, head map[string]string) *IdentityDiffRe
130138

131139
switch {
132140
case !exists:
133-
report.Removed = append(report.Removed, name)
141+
if !changedOnly {
142+
report.Removed = append(report.Removed, name)
143+
}
134144
case baseFP != headFP:
135145
report.Changed = append(report.Changed, name)
136146
default:
137-
report.Unchanged = append(report.Unchanged, name)
147+
if !changedOnly {
148+
report.Unchanged = append(report.Unchanged, name)
149+
}
138150
}
139151
}
140152

internal/app/azldev/cmds/component/diffidentity_test.go

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ import (
77
"testing"
88

99
"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component"
10+
"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils"
11+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms"
12+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
1013
"github.com/stretchr/testify/assert"
14+
"github.com/stretchr/testify/require"
1115
)
1216

1317
func TestComputeDiff(t *testing.T) {
@@ -25,7 +29,7 @@ func TestComputeDiff(t *testing.T) {
2529
"openssl": "sha256:ccc",
2630
}
2731

28-
report := component.ComputeDiff(base, head)
32+
report := component.ComputeDiff(base, head, false)
2933

3034
assert.Equal(t, []string{"wget"}, report.Changed)
3135
assert.Equal(t, []string{"libfoo"}, report.Added)
@@ -42,7 +46,7 @@ func TestComputeDiff(t *testing.T) {
4246
"curl": "sha256:aaa",
4347
}
4448

45-
report := component.ComputeDiff(base, head)
49+
report := component.ComputeDiff(base, head, false)
4650

4751
assert.Empty(t, report.Changed)
4852
assert.Empty(t, report.Added)
@@ -57,7 +61,7 @@ func TestComputeDiff(t *testing.T) {
5761
"wget": "sha256:bbb",
5862
}
5963

60-
report := component.ComputeDiff(base, head)
64+
report := component.ComputeDiff(base, head, false)
6165

6266
assert.Empty(t, report.Changed)
6367
assert.Equal(t, []string{"curl", "wget"}, report.Added)
@@ -71,7 +75,7 @@ func TestComputeDiff(t *testing.T) {
7175
}
7276
head := map[string]string{}
7377

74-
report := component.ComputeDiff(base, head)
78+
report := component.ComputeDiff(base, head, false)
7579

7680
assert.Empty(t, report.Changed)
7781
assert.Empty(t, report.Added)
@@ -80,7 +84,7 @@ func TestComputeDiff(t *testing.T) {
8084
})
8185

8286
t.Run("both empty", func(t *testing.T) {
83-
report := component.ComputeDiff(map[string]string{}, map[string]string{})
87+
report := component.ComputeDiff(map[string]string{}, map[string]string{}, false)
8488

8589
assert.Empty(t, report.Changed)
8690
assert.Empty(t, report.Added)
@@ -94,7 +98,7 @@ func TestComputeDiff(t *testing.T) {
9498
"openssl": "sha256:bbb",
9599
}
96100

97-
report := component.ComputeDiff(both, both)
101+
report := component.ComputeDiff(both, both, false)
98102

99103
assert.Empty(t, report.Changed)
100104
assert.Empty(t, report.Added)
@@ -114,8 +118,85 @@ func TestComputeDiff(t *testing.T) {
114118
"openssl": "sha256:ccc",
115119
}
116120

117-
report := component.ComputeDiff(base, head)
121+
report := component.ComputeDiff(base, head, false)
118122

119123
assert.Equal(t, []string{"curl", "zlib"}, report.Changed, "changed list should be sorted")
120124
})
125+
126+
t.Run("changed only", func(t *testing.T) {
127+
base := map[string]string{
128+
"curl": "sha256:aaa",
129+
"wget": "sha256:bbb",
130+
"openssl": "sha256:ccc",
131+
"libold": "sha256:fff",
132+
}
133+
head := map[string]string{
134+
"curl": "sha256:aaa",
135+
"wget": "sha256:ddd",
136+
"libfoo": "sha256:eee",
137+
"openssl": "sha256:ccc",
138+
}
139+
140+
report := component.ComputeDiff(base, head, true)
141+
142+
assert.Equal(t, []string{"wget"}, report.Changed)
143+
assert.Equal(t, []string{"libfoo"}, report.Added)
144+
assert.Empty(t, report.Removed, "removed should be empty with changedOnly")
145+
assert.Empty(t, report.Unchanged, "unchanged should be empty with changedOnly")
146+
})
147+
}
148+
149+
func TestDiffIdentities_MissingFile(t *testing.T) {
150+
testEnv := testutils.NewTestEnv(t)
151+
152+
_, err := component.DiffIdentities(testEnv.Env, "/nonexistent/base.json", "/nonexistent/head.json", false)
153+
require.Error(t, err)
154+
assert.Contains(t, err.Error(), "base identity file")
155+
}
156+
157+
func TestDiffIdentities_MalformedJSON(t *testing.T) {
158+
testEnv := testutils.NewTestEnv(t)
159+
160+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json",
161+
[]byte("not valid json"), fileperms.PublicFile))
162+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json",
163+
[]byte(`[{"component":"a","fingerprint":"sha256:aaa"}]`), fileperms.PublicFile))
164+
165+
_, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false)
166+
require.Error(t, err)
167+
assert.Contains(t, err.Error(), "base identity file")
168+
}
169+
170+
func TestDiffIdentities_ValidFiles(t *testing.T) {
171+
testEnv := testutils.NewTestEnv(t)
172+
173+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json",
174+
[]byte(`[{"component":"curl","fingerprint":"sha256:aaa"}]`), fileperms.PublicFile))
175+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json",
176+
[]byte(`[{"component":"curl","fingerprint":"sha256:bbb"},{"component":"wget","fingerprint":"sha256:ccc"}]`),
177+
fileperms.PublicFile))
178+
179+
result, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false)
180+
require.NoError(t, err)
181+
182+
// Default format is table, so we get []IdentityDiffResult.
183+
tableResults, ok := result.([]component.IdentityDiffResult)
184+
require.True(t, ok, "expected table results for default report format")
185+
require.Len(t, tableResults, 2)
186+
}
187+
188+
func TestDiffIdentities_EmptyArray(t *testing.T) {
189+
testEnv := testutils.NewTestEnv(t)
190+
191+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json",
192+
[]byte(`[]`), fileperms.PublicFile))
193+
require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json",
194+
[]byte(`[]`), fileperms.PublicFile))
195+
196+
result, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false)
197+
require.NoError(t, err)
198+
199+
tableResults, ok := result.([]component.IdentityDiffResult)
200+
require.True(t, ok)
201+
assert.Empty(t, tableResults)
121202
}

internal/fingerprint/fingerprint_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,38 @@ func TestComputeIdentity_SourceFilesChange(t *testing.T) {
336336
assert.NotEqual(t, fp1, fp2, "different source file hash must produce different fingerprints")
337337
}
338338

339+
func TestComputeIdentity_SourceFileOriginExcluded(t *testing.T) {
340+
ctx := newTestFS(t, map[string]string{
341+
"/specs/test.spec": "Name: testpkg\nVersion: 1.0",
342+
})
343+
344+
comp1 := baseComponent()
345+
comp1.SourceFiles = []projectconfig.SourceFileReference{
346+
{
347+
Filename: "source.tar.gz",
348+
Hash: "aaa111",
349+
HashType: fileutils.HashTypeSHA256,
350+
Origin: projectconfig.Origin{Type: "download", Uri: "https://old-cdn.example.com/source.tar.gz"},
351+
},
352+
}
353+
354+
comp2 := baseComponent()
355+
comp2.SourceFiles = []projectconfig.SourceFileReference{
356+
{
357+
Filename: "source.tar.gz",
358+
Hash: "aaa111",
359+
HashType: fileutils.HashTypeSHA256,
360+
Origin: projectconfig.Origin{Type: "download", Uri: "https://new-cdn.example.com/source.tar.gz"},
361+
},
362+
}
363+
distro := baseDistroRef()
364+
365+
fp1 := computeFingerprint(t, ctx, comp1, distro, 0)
366+
fp2 := computeFingerprint(t, ctx, comp2, distro, 0)
367+
368+
assert.Equal(t, fp1, fp2, "changing source file origin URL must NOT change fingerprint")
369+
}
370+
339371
func TestComputeIdentity_InputsBreakdown(t *testing.T) {
340372
ctx := newTestFS(t, map[string]string{
341373
"/specs/test.spec": "Name: testpkg\nVersion: 1.0",

internal/projectconfig/component.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ type SourceFileReference struct {
6161
HashType fileutils.HashType `toml:"hash-type,omitempty" json:"hashType,omitempty"`
6262

6363
// Origin for this source file. When omitted, the file is resolved via the lookaside cache.
64-
Origin Origin `toml:"origin,omitempty" json:"origin,omitempty"`
64+
Origin Origin `toml:"origin,omitempty" json:"origin,omitempty" fingerprint:"-"`
6565
}
6666

6767
// Defines a component group. Component groups are logical groupings of components (see [ComponentConfig]).

internal/projectconfig/fingerprint_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ func TestAllFingerprintedFieldsHaveDecision(t *testing.T) {
7575
// upstream commit hash (captured separately via SourceIdentity) is what matters.
7676
// Excluding this prevents a snapshot bump from marking all upstream components as changed.
7777
"DistroReference.Snapshot": true,
78+
79+
// SourceFileReference.Origin — download location metadata (URI, type), not a build input.
80+
// The file content is already captured by Filename + Hash; changing a CDN URL should not
81+
// trigger a rebuild.
82+
"SourceFileReference.Origin": true,
7883
}
7984

8085
// Collect all actual exclusions found via reflection.

internal/providers/sourceproviders/localidentity.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"encoding/hex"
1010
"fmt"
1111
"io/fs"
12+
"path/filepath"
1213
"sort"
1314

1415
"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components"
@@ -84,7 +85,12 @@ func (provider *LocalIdentityProvider) ResolveSourceIdentity(
8485
return "", fmt.Errorf("hashing file %#q:\n%w", filePath, hashErr)
8586
}
8687

87-
fmt.Fprintf(combinedHasher, "%s=%s\n", filePath, fileHash)
88+
relPath, relErr := filepath.Rel(provider.specDir, filePath)
89+
if relErr != nil {
90+
return "", fmt.Errorf("computing relative path for %#q:\n%w", filePath, relErr)
91+
}
92+
93+
fmt.Fprintf(combinedHasher, "%s=%s\n", relPath, fileHash)
8894
}
8995

9096
return hex.EncodeToString(combinedHasher.Sum(nil)), nil

internal/utils/git/git.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ func (g *GitProviderImpl) LsRemoteHead(
173173
return "", errors.New("repository URL cannot be empty")
174174
}
175175

176+
_, err := url.Parse(repoURL)
177+
if err != nil {
178+
return "", fmt.Errorf("invalid URL %#q:\n%w", repoURL, err)
179+
}
180+
176181
if branch == "" {
177182
return "", errors.New("branch cannot be empty")
178183
}

0 commit comments

Comments
 (0)