Skip to content

Commit bd6e9b5

Browse files
committed
feat(component): Add fingerprint calculation library for components
1 parent 382adf8 commit bd6e9b5

File tree

5 files changed

+604
-0
lines changed

5 files changed

+604
-0
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ require (
3636
github.com/magefile/mage v1.16.1
3737
github.com/mark3labs/mcp-go v0.45.0
3838
github.com/mattn/go-isatty v0.0.20
39+
github.com/mitchellh/hashstructure/v2 v2.0.2
3940
github.com/muesli/termenv v0.16.0
4041
github.com/nxadm/tail v1.4.11
4142
github.com/opencontainers/selinux v1.13.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
229229
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
230230
github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEjdM8w=
231231
github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
232+
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
233+
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
232234
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
233235
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
234236
github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8=

internal/fingerprint/doc.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
// Package fingerprint computes deterministic identity fingerprints for components.
5+
// A fingerprint captures all resolved build inputs so that changes to any input
6+
// (config fields, spec content, overlay files, distro context, upstream refs, or
7+
// Affects commit count) produce a different fingerprint.
8+
//
9+
// The primary entry point is [ComputeIdentity], which takes a resolved
10+
// [projectconfig.ComponentConfig] and additional context, and returns a
11+
// [ComponentIdentity] containing the overall fingerprint hash plus a breakdown
12+
// of individual input hashes for debugging.
13+
package fingerprint
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
package fingerprint
5+
6+
import (
7+
"crypto/sha256"
8+
"encoding/hex"
9+
"fmt"
10+
"io"
11+
"sort"
12+
"strconv"
13+
14+
"github.com/microsoft/azure-linux-dev-tools/internal/global/opctx"
15+
"github.com/microsoft/azure-linux-dev-tools/internal/projectconfig"
16+
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
17+
"github.com/mitchellh/hashstructure/v2"
18+
)
19+
20+
// hashstructureTagName is the struct tag name used by hashstructure to determine
21+
// field inclusion. Fields tagged with `fingerprint:"-"` are excluded.
22+
const hashstructureTagName = "fingerprint"
23+
24+
// ComponentIdentity holds the computed fingerprint for a single component plus
25+
// a breakdown of individual input hashes for debugging.
26+
type ComponentIdentity struct {
27+
// Fingerprint is the overall SHA256 hash combining all inputs.
28+
Fingerprint string `json:"fingerprint"`
29+
// Inputs provides the individual input hashes that were combined.
30+
Inputs ComponentInputs `json:"inputs"`
31+
}
32+
33+
// ComponentInputs contains the individual input hashes that comprise a component's
34+
// fingerprint.
35+
type ComponentInputs struct {
36+
// ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure).
37+
ConfigHash uint64 `json:"configHash"`
38+
// SpecContentHash is the SHA256 of the spec file content (local specs only).
39+
SpecContentHash string `json:"specContentHash,omitempty"`
40+
// OverlayFileHashes maps overlay source file paths to their SHA256 hashes.
41+
OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"`
42+
// AffectsCommitCount is the number of "Affects: <component>" commits in the project repo.
43+
AffectsCommitCount int `json:"affectsCommitCount"`
44+
// Distro is the effective distro name.
45+
Distro string `json:"distro"`
46+
// DistroVersion is the effective distro version.
47+
DistroVersion string `json:"distroVersion"`
48+
}
49+
50+
// ComputeIdentity computes the fingerprint for a component from its resolved config
51+
// and additional context. The fs parameter is used to read spec file and overlay
52+
// source file contents for hashing.
53+
func ComputeIdentity(
54+
fs opctx.FS,
55+
component projectconfig.ComponentConfig,
56+
distroRef projectconfig.DistroReference,
57+
affectsCommitCount int,
58+
) (*ComponentIdentity, error) {
59+
inputs := ComponentInputs{
60+
AffectsCommitCount: affectsCommitCount,
61+
Distro: distroRef.Name,
62+
DistroVersion: distroRef.Version,
63+
}
64+
65+
// 1. Hash the resolved config struct (excluding fingerprint:"-" fields).
66+
configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{
67+
TagName: hashstructureTagName,
68+
})
69+
if err != nil {
70+
return nil, fmt.Errorf("hashing component config:\n%w", err)
71+
}
72+
73+
inputs.ConfigHash = configHash
74+
75+
// 2. Hash spec file content for local specs.
76+
if component.Spec.SourceType == projectconfig.SpecSourceTypeLocal ||
77+
component.Spec.SourceType == projectconfig.SpecSourceTypeUnspecified {
78+
if component.Spec.Path != "" {
79+
specHash, fileErr := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, component.Spec.Path)
80+
if fileErr != nil {
81+
return nil, fmt.Errorf("hashing spec file %#q:\n%w", component.Spec.Path, fileErr)
82+
}
83+
84+
inputs.SpecContentHash = specHash
85+
}
86+
}
87+
88+
// 3. Hash overlay source file contents.
89+
overlayHashes, err := hashOverlayFiles(fs, component.Overlays)
90+
if err != nil {
91+
return nil, fmt.Errorf("hashing overlay files:\n%w", err)
92+
}
93+
94+
inputs.OverlayFileHashes = overlayHashes
95+
96+
// 4. Combine all inputs into the overall fingerprint.
97+
return &ComponentIdentity{
98+
Fingerprint: combineInputs(inputs),
99+
Inputs: inputs,
100+
}, nil
101+
}
102+
103+
// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference
104+
// local files. Returns a map of source path to hex hash, or an empty map if no overlay
105+
// source files exist.
106+
func hashOverlayFiles(
107+
fs opctx.FS,
108+
overlays []projectconfig.ComponentOverlay,
109+
) (map[string]string, error) {
110+
hashes := make(map[string]string)
111+
112+
for _, overlay := range overlays {
113+
if overlay.Source == "" {
114+
continue
115+
}
116+
117+
fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source)
118+
if err != nil {
119+
return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err)
120+
}
121+
122+
hashes[overlay.Source] = fileHash
123+
}
124+
125+
return hashes, nil
126+
}
127+
128+
// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint.
129+
func combineInputs(inputs ComponentInputs) string {
130+
hasher := sha256.New()
131+
132+
// Write each input in a fixed order with field labels for domain separation.
133+
writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10))
134+
writeField(hasher, "spec_content_hash", inputs.SpecContentHash)
135+
writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount))
136+
writeField(hasher, "distro", inputs.Distro)
137+
writeField(hasher, "distro_version", inputs.DistroVersion)
138+
139+
// Overlay file hashes in sorted key order for determinism.
140+
if len(inputs.OverlayFileHashes) > 0 {
141+
keys := make([]string, 0, len(inputs.OverlayFileHashes))
142+
for key := range inputs.OverlayFileHashes {
143+
keys = append(keys, key)
144+
}
145+
146+
sort.Strings(keys)
147+
148+
for _, key := range keys {
149+
writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key])
150+
}
151+
}
152+
153+
return "sha256:" + hex.EncodeToString(hasher.Sum(nil))
154+
}
155+
156+
// writeField writes a labeled value to the hasher for domain separation.
157+
func writeField(writer io.Writer, label string, value string) {
158+
// Use label=value\n format. Length-prefixing the label prevents
159+
// collisions between field names that are prefixes of each other.
160+
fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value)
161+
}

0 commit comments

Comments
 (0)