-
Notifications
You must be signed in to change notification settings - Fork 11
feat(component): Add deterministic component fingerprints #47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
15dcea7
94e19ff
78e76d5
98422f3
ce450ef
f2c50ee
cf9d102
26fd8d3
561fb1f
354b059
7faa755
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT License. | ||
|
|
||
| // Package fingerprint computes deterministic identity fingerprints for components. | ||
| // A fingerprint captures all resolved build inputs so that changes to any input | ||
| // (config fields, spec content, overlay files, distro context, upstream refs, or | ||
| // Affects commit count) produce a different fingerprint. | ||
| // | ||
| // The primary entry point is [ComputeIdentity], which takes a resolved | ||
| // [projectconfig.ComponentConfig] and additional context, and returns a | ||
| // [ComponentIdentity] containing the overall fingerprint hash plus a breakdown | ||
| // of individual input hashes for debugging. | ||
|
dmcilvaney marked this conversation as resolved.
|
||
| package fingerprint | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,170 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT License. | ||
|
|
||
| package fingerprint | ||
|
|
||
| import ( | ||
| "crypto/sha256" | ||
| "encoding/hex" | ||
| "fmt" | ||
| "io" | ||
| "sort" | ||
| "strconv" | ||
|
|
||
| "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" | ||
| "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" | ||
| "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" | ||
| "github.com/mitchellh/hashstructure/v2" | ||
| ) | ||
|
|
||
| // hashstructureTagName is the struct tag name used by hashstructure to determine | ||
| // field inclusion. Fields tagged with `fingerprint:"-"` are excluded. | ||
| const hashstructureTagName = "fingerprint" | ||
|
|
||
| // ComponentIdentity holds the computed fingerprint for a single component plus | ||
| // a breakdown of individual input hashes for debugging. | ||
| type ComponentIdentity struct { | ||
| // Fingerprint is the overall SHA256 hash combining all inputs. | ||
| Fingerprint string `json:"fingerprint"` | ||
| // Inputs provides the individual input hashes that were combined. | ||
| Inputs ComponentInputs `json:"inputs"` | ||
| } | ||
|
|
||
| // ComponentInputs contains the individual input hashes that comprise a component's | ||
| // fingerprint. | ||
| type ComponentInputs struct { | ||
| // ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure). | ||
| ConfigHash uint64 `json:"configHash"` | ||
|
dmcilvaney marked this conversation as resolved.
|
||
| // SourceIdentity is the opaque identity string for the component's source. | ||
| // For local specs this is a content hash; for upstream specs this is a commit hash. | ||
| SourceIdentity string `json:"sourceIdentity,omitempty"` | ||
| // OverlayFileHashes maps overlay source file paths to their SHA256 hashes. | ||
|
dmcilvaney marked this conversation as resolved.
Outdated
|
||
| OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"` | ||
| // AffectsCommitCount is the number of "Affects: <component>" commits in the project repo. | ||
| AffectsCommitCount int `json:"affectsCommitCount"` | ||
| // Distro is the effective distro name. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this need to be tracked? |
||
| Distro string `json:"distro"` | ||
| // DistroVersion is the effective distro version. | ||
| DistroVersion string `json:"distroVersion"` | ||
| } | ||
|
|
||
| // IdentityOptions holds additional inputs for computing a component's identity | ||
| // that are not part of the component config itself. | ||
| type IdentityOptions struct { | ||
| // AffectsCommitCount is the number of "Affects: <component>" commits. | ||
| AffectsCommitCount int | ||
| // SourceIdentity is the opaque identity string from a [sourceproviders.SourceIdentityProvider]. | ||
| SourceIdentity string | ||
| } | ||
|
|
||
| // ComputeIdentity computes the fingerprint for a component from its resolved config | ||
| // and additional context. The fs parameter is used to read overlay source file | ||
| // contents for hashing; spec content identity is provided via opts.SourceIdentity. | ||
| func ComputeIdentity( | ||
| fs opctx.FS, | ||
| component projectconfig.ComponentConfig, | ||
| distroRef projectconfig.DistroReference, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the distro that the component is being built for or the distro that the component's spec may have come from? Can we document and clarify?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be the effective distro for the component, ie if one is set, pick that, otherwise the global default.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That said, we can probably drop it to just ReleaseVer, that might be the only bit we really care about. |
||
| opts IdentityOptions, | ||
| ) (*ComponentIdentity, error) { | ||
| inputs := ComponentInputs{ | ||
| AffectsCommitCount: opts.AffectsCommitCount, | ||
| SourceIdentity: opts.SourceIdentity, | ||
| Distro: distroRef.Name, | ||
| DistroVersion: distroRef.Version, | ||
| } | ||
|
dmcilvaney marked this conversation as resolved.
dmcilvaney marked this conversation as resolved.
|
||
|
|
||
| // 1. Verify all source files have a hash. Without a hash the fingerprint | ||
| // cannot detect content changes, so we refuse to compute one. | ||
| for i := range component.SourceFiles { | ||
| if component.SourceFiles[i].Hash == "" { | ||
| return nil, fmt.Errorf( | ||
| "source file %#q has no hash; cannot compute a deterministic fingerprint", | ||
| component.SourceFiles[i].Filename, | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| // 2. Hash the resolved config struct (excluding fingerprint:"-" fields). | ||
| configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{ | ||
| TagName: hashstructureTagName, | ||
| }) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("hashing component config:\n%w", err) | ||
| } | ||
|
|
||
| inputs.ConfigHash = configHash | ||
|
|
||
|
dmcilvaney marked this conversation as resolved.
Outdated
|
||
| // 3. Hash overlay source file contents. | ||
| overlayHashes, err := hashOverlayFiles(fs, component.Overlays) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("hashing overlay files:\n%w", err) | ||
| } | ||
|
|
||
| inputs.OverlayFileHashes = overlayHashes | ||
|
|
||
| // 4. Combine all inputs into the overall fingerprint. | ||
| return &ComponentIdentity{ | ||
| Fingerprint: combineInputs(inputs), | ||
| Inputs: inputs, | ||
| }, nil | ||
| } | ||
|
|
||
| // hashOverlayFiles computes SHA256 hashes for all overlay source files that reference | ||
| // local files. Returns a map of source path to hex hash, or an empty map if no overlay | ||
| // source files exist. | ||
| func hashOverlayFiles( | ||
| fs opctx.FS, | ||
| overlays []projectconfig.ComponentOverlay, | ||
| ) (map[string]string, error) { | ||
| hashes := make(map[string]string) | ||
|
|
||
| for _, overlay := range overlays { | ||
| if overlay.Source == "" { | ||
|
dmcilvaney marked this conversation as resolved.
Outdated
|
||
| continue | ||
| } | ||
|
|
||
| fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err) | ||
| } | ||
|
|
||
| hashes[overlay.Source] = fileHash | ||
| } | ||
|
|
||
| return hashes, nil | ||
| } | ||
|
|
||
| // combineInputs deterministically combines all input hashes into a single SHA256 fingerprint. | ||
| func combineInputs(inputs ComponentInputs) string { | ||
| hasher := sha256.New() | ||
|
|
||
| // Write each input in a fixed order with field labels for domain separation. | ||
| writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10)) | ||
| writeField(hasher, "source_identity", inputs.SourceIdentity) | ||
| writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount)) | ||
| writeField(hasher, "distro", inputs.Distro) | ||
| writeField(hasher, "distro_version", inputs.DistroVersion) | ||
|
|
||
| // Overlay file hashes in sorted key order for determinism. | ||
| if len(inputs.OverlayFileHashes) > 0 { | ||
| keys := make([]string, 0, len(inputs.OverlayFileHashes)) | ||
| for key := range inputs.OverlayFileHashes { | ||
| keys = append(keys, key) | ||
| } | ||
|
|
||
| sort.Strings(keys) | ||
|
|
||
| for _, key := range keys { | ||
| writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key]) | ||
|
dmcilvaney marked this conversation as resolved.
|
||
| } | ||
| } | ||
|
dmcilvaney marked this conversation as resolved.
dmcilvaney marked this conversation as resolved.
|
||
|
|
||
| return "sha256:" + hex.EncodeToString(hasher.Sum(nil)) | ||
| } | ||
|
|
||
| // writeField writes a labeled value to the hasher for domain separation. | ||
| func writeField(writer io.Writer, label string, value string) { | ||
| // Use label=value\n format. Length-prefixing the label prevents | ||
| // collisions between field names that are prefixes of each other. | ||
| fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value) | ||
|
dmcilvaney marked this conversation as resolved.
Outdated
|
||
| } | ||
|
dmcilvaney marked this conversation as resolved.
|
||
Uh oh!
There was an error while loading. Please reload this page.