Skip to content

Commit 8326413

Browse files
committed
feat(cli): Add component identity-diff command, scenario test
1 parent f3130bd commit 8326413

13 files changed

+794
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Component Identity & Change Detection
2+
3+
The `component identity` and `component diff-identity` subcommands compute deterministic fingerprints of component build inputs. For example, CI can compute fingerprints for the base and head commits of a PR, then diff them to determine exactly which components have changed and need to be rebuilt/tested.
4+
5+
```bash
6+
# Typical CI workflow
7+
git checkout $BASE_REF && azldev component identity -a -O json > base.json
8+
git checkout $HEAD_REF && azldev component identity -a -O json > head.json
9+
azldev component diff-identity base.json head.json -O json -c
10+
# → {"changed": ["curl"], "added": ["wget"], "removed": [], "unchanged": []}
11+
```
12+
13+
## Fingerprint Inputs
14+
15+
A component's fingerprint is a SHA256 combining:
16+
17+
1. **Config hash**`hashstructure.Hash()` of the resolved `ComponentConfig` (after all merging). Fields tagged `fingerprint:"-"` are excluded.
18+
2. **Source identity** — content hash for local specs (all files in the spec directory), commit hash for upstream.
19+
3. **Overlay file hashes** — SHA256 of each file referenced by overlay `Source` fields.
20+
4. **Distro name + version**
21+
5. **Affects commit count** — number of `Affects: <component>` commits in the project repo.
22+
23+
Global change propagation works automatically: the fingerprint operates on the fully-merged config, so a change to a distro or group default changes the resolved config of every inheriting component.
24+
25+
## `fingerprint:"-"` Tag System
26+
27+
The `hashstructure` library uses `TagName: "fingerprint"`. Untagged fields are **included by default** (safe default: false positive > false negative).
28+
29+
A guard test (`TestAllFingerprintedFieldsHaveDecision`) reflects over all fingerprinted structs and maintains a bi-directional allowlist of exclusions. It fails if a `fingerprint:"-"` tag is added without registering it, or if a registered exclusion's tag is removed.
30+
31+
### Adding a New Config Field
32+
33+
1. Add the field to the struct in `internal/projectconfig/`.
34+
2. **If NOT a build input**: add `fingerprint:"-"` to the struct tag and register it in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`.
35+
3. **If a build input**: do nothing — included by default.
36+
4. Run `mage unit`.
37+
38+
### Adding a New Source Type
39+
40+
1. Implement `SourceIdentityProvider` on your provider (see `ResolveLocalSourceIdentity` in `localidentity.go` for a simple example).
41+
2. Add a case to `sourceManager.ResolveSourceIdentity()` in `sourcemanager.go`.
42+
3. Add tests in `identityprovider_test.go`.
43+
44+
## CLI
45+
46+
### `azldev component identity`
47+
48+
Compute fingerprints. Uses standard component filter flags (`-a`, `-p`, `-g`, `-s`). Exposed as an MCP tool.
49+
50+
### `azldev component diff-identity`
51+
52+
Compare two identity JSON files. The `--changed-only` / `-c` flag filters to only changed and added components (the build queue). Applies to both table and JSON output.
53+
54+
## Known Limitations
55+
56+
- It is difficult to determine WHY a diff occurred (e.g., which specific field changed) since the fingerprint is a single opaque hash. The JSON output includes an `inputs` breakdown (`configHash`, `sourceIdentity`, `overlayFileHashes`, etc.) that can help narrow it down by comparing the two identity files manually.

docs/user/reference/cli/azldev_component.md

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/user/reference/cli/azldev_component_diff-identity.md

Lines changed: 54 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/app/azldev/cmds/component/component.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ components defined in the project configuration.`,
2525
app.AddTopLevelCommand(cmd)
2626
addOnAppInit(app, cmd)
2727
buildOnAppInit(app, cmd)
28+
diffIdentityOnAppInit(app, cmd)
2829
diffSourcesOnAppInit(app, cmd)
2930
identityOnAppInit(app, cmd)
3031
listOnAppInit(app, cmd)
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package component
5+
6+
import (
7+
"encoding/json"
8+
"fmt"
9+
"sort"
10+
11+
"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev"
12+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
13+
"github.com/spf13/cobra"
14+
)
15+
16+
func diffIdentityOnAppInit(_ *azldev.App, parentCmd *cobra.Command) {
17+
parentCmd.AddCommand(NewDiffIdentityCommand())
18+
}
19+
20+
// diffIdentityArgCount is the number of positional arguments required by the diff-identity command.
21+
const diffIdentityArgCount = 2
22+
23+
// NewDiffIdentityCommand constructs a [cobra.Command] for "component diff-identity".
24+
func NewDiffIdentityCommand() *cobra.Command {
25+
var options struct {
26+
ChangedOnly bool
27+
}
28+
29+
cmd := &cobra.Command{
30+
Use: "diff-identity <base.json> <head.json>",
31+
Short: "Compare two identity files and report changed components",
32+
Long: `Compare two component identity JSON files (produced by 'component identity -a -O json')
33+
and report which components have changed, been added, or been removed.
34+
35+
CI uses the 'changed' and 'added' lists to determine the build queue.`,
36+
Example: ` # Compare base and head identity files
37+
azldev component diff-identity base-identity.json head-identity.json
38+
39+
# JSON output for CI
40+
azldev component diff-identity base.json head.json -O json`,
41+
Args: cobra.ExactArgs(diffIdentityArgCount),
42+
RunE: azldev.RunFuncWithoutRequiredConfigWithExtraArgs(
43+
func(env *azldev.Env, args []string) (interface{}, error) {
44+
return DiffIdentities(env, args[0], args[1], options.ChangedOnly)
45+
},
46+
),
47+
}
48+
49+
cmd.Flags().BoolVarP(&options.ChangedOnly, "changed-only", "c", false,
50+
"Only show changed and added components (the build queue)")
51+
52+
return cmd
53+
}
54+
55+
// IdentityDiffStatus represents the change status of a component.
56+
type IdentityDiffStatus string
57+
58+
const (
59+
// IdentityDiffChanged indicates the component's fingerprint changed.
60+
IdentityDiffChanged IdentityDiffStatus = "changed"
61+
// IdentityDiffAdded indicates the component is new in the head.
62+
IdentityDiffAdded IdentityDiffStatus = "added"
63+
// IdentityDiffRemoved indicates the component was removed in the head.
64+
IdentityDiffRemoved IdentityDiffStatus = "removed"
65+
// IdentityDiffUnchanged indicates the component's fingerprint is identical.
66+
IdentityDiffUnchanged IdentityDiffStatus = "unchanged"
67+
)
68+
69+
// IdentityDiffResult is the per-component row in table output.
70+
type IdentityDiffResult struct {
71+
Component string `json:"component" table:",sortkey"`
72+
Status IdentityDiffStatus `json:"status"`
73+
}
74+
75+
// IdentityDiffReport is the structured output for JSON format.
76+
type IdentityDiffReport struct {
77+
Changed []string `json:"changed"`
78+
Added []string `json:"added"`
79+
Removed []string `json:"removed"`
80+
Unchanged []string `json:"unchanged"`
81+
}
82+
83+
// DiffIdentities reads two identity JSON files and computes the diff.
84+
func DiffIdentities(env *azldev.Env, basePath string, headPath string, changedOnly bool) (interface{}, error) {
85+
baseIdentities, err := readIdentityFile(env, basePath)
86+
if err != nil {
87+
return nil, fmt.Errorf("reading base identity file %#q:\n%w", basePath, err)
88+
}
89+
90+
headIdentities, err := readIdentityFile(env, headPath)
91+
if err != nil {
92+
return nil, fmt.Errorf("reading head identity file %#q:\n%w", headPath, err)
93+
}
94+
95+
report := ComputeDiff(baseIdentities, headIdentities, changedOnly)
96+
97+
// Return table-friendly results for table/CSV format, or the report for JSON.
98+
if env.DefaultReportFormat() == azldev.ReportFormatJSON {
99+
return report, nil
100+
}
101+
102+
return buildTableResults(report), nil
103+
}
104+
105+
// readIdentityFile reads and parses a component identity JSON file into a map of
106+
// component name to fingerprint.
107+
func readIdentityFile(
108+
env *azldev.Env, filePath string,
109+
) (map[string]string, error) {
110+
data, err := fileutils.ReadFile(env.FS(), filePath)
111+
if err != nil {
112+
return nil, fmt.Errorf("reading file:\n%w", err)
113+
}
114+
115+
var entries []ComponentIdentityResult
116+
117+
err = json.Unmarshal(data, &entries)
118+
if err != nil {
119+
return nil, fmt.Errorf("parsing JSON:\n%w", err)
120+
}
121+
122+
result := make(map[string]string, len(entries))
123+
for _, entry := range entries {
124+
result[entry.Component] = entry.Fingerprint
125+
}
126+
127+
return result, nil
128+
}
129+
130+
// ComputeDiff compares base and head identity maps and produces a diff report.
131+
// When changedOnly is true, the Removed and Unchanged lists are left empty.
132+
func ComputeDiff(base map[string]string, head map[string]string, changedOnly bool) *IdentityDiffReport {
133+
// Initialize all slices so JSON serialization produces [] instead of null.
134+
report := &IdentityDiffReport{
135+
Changed: make([]string, 0),
136+
Added: make([]string, 0),
137+
Removed: make([]string, 0),
138+
Unchanged: make([]string, 0),
139+
}
140+
141+
// Check base components against head.
142+
for name, baseFP := range base {
143+
headFP, exists := head[name]
144+
145+
switch {
146+
case !exists:
147+
if !changedOnly {
148+
report.Removed = append(report.Removed, name)
149+
}
150+
case baseFP != headFP:
151+
report.Changed = append(report.Changed, name)
152+
default:
153+
if !changedOnly {
154+
report.Unchanged = append(report.Unchanged, name)
155+
}
156+
}
157+
}
158+
159+
// Check for new components in head.
160+
for name := range head {
161+
if _, exists := base[name]; !exists {
162+
report.Added = append(report.Added, name)
163+
}
164+
}
165+
166+
// Sort all lists for deterministic output.
167+
sort.Strings(report.Changed)
168+
sort.Strings(report.Added)
169+
sort.Strings(report.Removed)
170+
sort.Strings(report.Unchanged)
171+
172+
return report
173+
}
174+
175+
// buildTableResults converts the diff report into a slice for table output.
176+
func buildTableResults(report *IdentityDiffReport) []IdentityDiffResult {
177+
results := make([]IdentityDiffResult, 0,
178+
len(report.Changed)+len(report.Added)+len(report.Removed)+len(report.Unchanged))
179+
180+
for _, name := range report.Changed {
181+
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffChanged})
182+
}
183+
184+
for _, name := range report.Added {
185+
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffAdded})
186+
}
187+
188+
for _, name := range report.Removed {
189+
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffRemoved})
190+
}
191+
192+
for _, name := range report.Unchanged {
193+
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffUnchanged})
194+
}
195+
196+
return results
197+
}

0 commit comments

Comments
 (0)