Skip to content

Commit 3fb8df7

Browse files
committed
feat(component): Add deterministic component fingerprints
1 parent 730d14d commit 3fb8df7

File tree

6 files changed

+774
-2
lines changed

6 files changed

+774
-2
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ require (
3636
github.com/magefile/mage v1.16.1
3737
github.com/mark3labs/mcp-go v0.45.0
3838
github.com/mattn/go-isatty v0.0.20
39+
github.com/mitchellh/hashstructure/v2 v2.0.2
3940
github.com/muesli/termenv v0.16.0
4041
github.com/nxadm/tail v1.4.11
4142
github.com/opencontainers/selinux v1.13.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
229229
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
230230
github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEjdM8w=
231231
github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
232+
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
233+
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
232234
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
233235
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
234236
github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8=

internal/app/azldev/core/sources/synthistory.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ func buildSyntheticCommits(
140140
return nil, nil
141141
}
142142

143-
projectRepo, err := openProjectRepo(configFilePath)
143+
projectRepo, err := OpenProjectRepo(configFilePath)
144144
if err != nil {
145145
return nil, err
146146
}
@@ -208,7 +208,7 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName
208208

209209
// openProjectRepo finds and opens the git repository containing configFilePath by
210210
// walking up the directory tree.
211-
func openProjectRepo(configFilePath string) (*gogit.Repository, error) {
211+
func OpenProjectRepo(configFilePath string) (*gogit.Repository, error) {
212212
repo, err := gogit.PlainOpenWithOptions(filepath.Dir(configFilePath), &gogit.PlainOpenOptions{
213213
DetectDotGit: true,
214214
})

internal/fingerprint/doc.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
// Package fingerprint computes deterministic identity fingerprints for components.
5+
// A fingerprint captures all resolved build inputs so that changes to any input
6+
// (config fields, spec content, overlay files, distro context, upstream refs, or
7+
// Affects commit count) produce a different fingerprint.
8+
//
9+
// The primary entry point is [ComputeIdentity], which takes a resolved
10+
// [projectconfig.ComponentConfig] and additional context, and returns a
11+
// [ComponentIdentity] containing the overall fingerprint hash plus a breakdown
12+
// of individual input hashes for debugging.
13+
package fingerprint
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package fingerprint
5+
6+
import (
7+
"crypto/sha256"
8+
"encoding/hex"
9+
"fmt"
10+
"io"
11+
"sort"
12+
"strconv"
13+
14+
"github.com/microsoft/azure-linux-dev-tools/internal/global/opctx"
15+
"github.com/microsoft/azure-linux-dev-tools/internal/projectconfig"
16+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
17+
"github.com/mitchellh/hashstructure/v2"
18+
)
19+
20+
// hashstructureTagName is the struct tag name used by hashstructure to determine
21+
// field inclusion. Fields tagged with `fingerprint:"-"` are excluded.
22+
const hashstructureTagName = "fingerprint"
23+
24+
// ComponentIdentity holds the computed fingerprint for a single component plus
25+
// a breakdown of individual input hashes for debugging.
26+
type ComponentIdentity struct {
27+
// Fingerprint is the overall SHA256 hash combining all inputs.
28+
Fingerprint string `json:"fingerprint"`
29+
// Inputs provides the individual input hashes that were combined.
30+
Inputs ComponentInputs `json:"inputs"`
31+
}
32+
33+
// ComponentInputs contains the individual input hashes that comprise a component's
34+
// fingerprint.
35+
type ComponentInputs struct {
36+
// ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure).
37+
ConfigHash uint64 `json:"configHash"`
38+
// SourceIdentity is the opaque identity string for the component's source.
39+
// For local specs this is a content hash; for upstream specs this is a commit hash.
40+
SourceIdentity string `json:"sourceIdentity,omitempty"`
41+
// OverlayFileHashes maps overlay source file paths to their SHA256 hashes.
42+
OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"`
43+
// AffectsCommitCount is the number of "Affects: <component>" commits in the project repo.
44+
AffectsCommitCount int `json:"affectsCommitCount"`
45+
// Distro is the effective distro name.
46+
Distro string `json:"distro"`
47+
// DistroVersion is the effective distro version.
48+
DistroVersion string `json:"distroVersion"`
49+
}
50+
51+
// IdentityOptions holds additional inputs for computing a component's identity
52+
// that are not part of the component config itself.
53+
type IdentityOptions struct {
54+
// AffectsCommitCount is the number of "Affects: <component>" commits.
55+
AffectsCommitCount int
56+
// SourceIdentity is the opaque identity string from a [sourceproviders.SourceIdentityProvider].
57+
SourceIdentity string
58+
}
59+
60+
// ComputeIdentity computes the fingerprint for a component from its resolved config
61+
// and additional context. The fs parameter is used to read overlay source file
62+
// contents for hashing; spec content identity is provided via opts.SourceIdentity.
63+
func ComputeIdentity(
64+
fs opctx.FS,
65+
component projectconfig.ComponentConfig,
66+
distroRef projectconfig.DistroReference,
67+
opts IdentityOptions,
68+
) (*ComponentIdentity, error) {
69+
inputs := ComponentInputs{
70+
AffectsCommitCount: opts.AffectsCommitCount,
71+
SourceIdentity: opts.SourceIdentity,
72+
Distro: distroRef.Name,
73+
DistroVersion: distroRef.Version,
74+
}
75+
76+
// 1. Hash the resolved config struct (excluding fingerprint:"-" fields).
77+
configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{
78+
TagName: hashstructureTagName,
79+
})
80+
if err != nil {
81+
return nil, fmt.Errorf("hashing component config:\n%w", err)
82+
}
83+
84+
inputs.ConfigHash = configHash
85+
86+
// 2. Hash overlay source file contents.
87+
overlayHashes, err := hashOverlayFiles(fs, component.Overlays)
88+
if err != nil {
89+
return nil, fmt.Errorf("hashing overlay files:\n%w", err)
90+
}
91+
92+
inputs.OverlayFileHashes = overlayHashes
93+
94+
// 3. Combine all inputs into the overall fingerprint.
95+
return &ComponentIdentity{
96+
Fingerprint: combineInputs(inputs),
97+
Inputs: inputs,
98+
}, nil
99+
}
100+
101+
// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference
102+
// local files. Returns a map of source path to hex hash, or an empty map if no overlay
103+
// source files exist.
104+
func hashOverlayFiles(
105+
fs opctx.FS,
106+
overlays []projectconfig.ComponentOverlay,
107+
) (map[string]string, error) {
108+
hashes := make(map[string]string)
109+
110+
for _, overlay := range overlays {
111+
if overlay.Source == "" {
112+
continue
113+
}
114+
115+
fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source)
116+
if err != nil {
117+
return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err)
118+
}
119+
120+
hashes[overlay.Source] = fileHash
121+
}
122+
123+
return hashes, nil
124+
}
125+
126+
// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint.
127+
func combineInputs(inputs ComponentInputs) string {
128+
hasher := sha256.New()
129+
130+
// Write each input in a fixed order with field labels for domain separation.
131+
writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10))
132+
writeField(hasher, "source_identity", inputs.SourceIdentity)
133+
writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount))
134+
writeField(hasher, "distro", inputs.Distro)
135+
writeField(hasher, "distro_version", inputs.DistroVersion)
136+
137+
// Overlay file hashes in sorted key order for determinism.
138+
if len(inputs.OverlayFileHashes) > 0 {
139+
keys := make([]string, 0, len(inputs.OverlayFileHashes))
140+
for key := range inputs.OverlayFileHashes {
141+
keys = append(keys, key)
142+
}
143+
144+
sort.Strings(keys)
145+
146+
for _, key := range keys {
147+
writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key])
148+
}
149+
}
150+
151+
return "sha256:" + hex.EncodeToString(hasher.Sum(nil))
152+
}
153+
154+
// writeField writes a labeled value to the hasher for domain separation.
155+
func writeField(writer io.Writer, label string, value string) {
156+
// Use label=value\n format. Length-prefixing the label prevents
157+
// collisions between field names that are prefixes of each other.
158+
fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value)
159+
}

0 commit comments

Comments
 (0)