Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ require (
github.com/magefile/mage v1.17.1
github.com/mark3labs/mcp-go v0.46.0
github.com/mattn/go-isatty v0.0.20
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/muesli/termenv v0.16.0
github.com/nxadm/tail v1.4.11
github.com/opencontainers/selinux v1.13.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEjdM8w=
github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8=
Expand Down
13 changes: 13 additions & 0 deletions internal/fingerprint/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

// Package fingerprint computes deterministic identity fingerprints for components.
// A fingerprint captures all resolved build inputs so that changes to any input
// (config fields, spec content, overlay files, distro context, upstream refs, or
// Affects commit count) produce a different fingerprint.
Comment thread
dmcilvaney marked this conversation as resolved.
Outdated
//
// The primary entry point is [ComputeIdentity], which takes a resolved
// [projectconfig.ComponentConfig] and additional context, and returns a
// [ComponentIdentity] containing the overall fingerprint hash plus a breakdown
// of individual input hashes for debugging.
Comment thread
dmcilvaney marked this conversation as resolved.
package fingerprint
199 changes: 199 additions & 0 deletions internal/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package fingerprint

import (
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"path/filepath"
"sort"
"strconv"

"github.com/microsoft/azure-linux-dev-tools/internal/global/opctx"
"github.com/microsoft/azure-linux-dev-tools/internal/projectconfig"
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
"github.com/mitchellh/hashstructure/v2"
)

// hashstructureTagName is the struct tag name used by hashstructure to determine
// field inclusion. Fields tagged with `fingerprint:"-"` are excluded.
const hashstructureTagName = "fingerprint"

// ComponentIdentity holds the computed fingerprint for a single component plus
// a breakdown of individual input hashes for debugging.
type ComponentIdentity struct {
// Fingerprint is the overall SHA256 hash combining all inputs.
Fingerprint string `json:"fingerprint"`
// Inputs provides the individual input hashes that were combined.
Inputs ComponentInputs `json:"inputs"`
}

// ComponentInputs contains the individual input hashes that comprise a component's
// fingerprint.
type ComponentInputs struct {
// ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure).
ConfigHash uint64 `json:"configHash"`
Comment thread
dmcilvaney marked this conversation as resolved.
// SourceIdentity is the opaque identity string for the component's source.
// For local specs this is a content hash; for upstream specs this is a commit hash.
SourceIdentity string `json:"sourceIdentity,omitempty"`
// OverlayFileHashes maps overlay index (as string) to a combined hash of the
// source file's basename and content. Keyed by index rather than path to avoid
// checkout-location dependence.
OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"`
// AffectsCommitCount is the number of "Affects: <component>" commits in the project repo.
AffectsCommitCount int `json:"affectsCommitCount"`
// Distro is the effective distro name.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this need to be tracked?

Distro string `json:"distro"`
// DistroVersion is the effective distro version.
DistroVersion string `json:"distroVersion"`
}

// IdentityOptions holds additional inputs for computing a component's identity
// that are not part of the component config itself.
type IdentityOptions struct {
// AffectsCommitCount is the number of "Affects: <component>" commits.
AffectsCommitCount int
// SourceIdentity is the opaque identity string from a [sourceproviders.SourceIdentityProvider].
// For upstream components this is the resolved commit hash; for local components this is a
// content hash of the spec directory.
//
// This is caller-provided because resolving it requires network access (upstream clone) or
// filesystem traversal (local content hash). [ComputeIdentity] is a pure combiner — it does
// not perform I/O beyond reading overlay files. Callers should resolve source identity via
// SourceManager.ResolveSourceIdentity before calling [ComputeIdentity].
SourceIdentity string
}

// ComputeIdentity computes the fingerprint for a component from its resolved config
// and additional context. The fs parameter is used to read overlay source file
// contents for hashing; spec content identity is provided via [IdentityOptions.SourceIdentity].
//
// This function is a deterministic combiner: given the same resolved inputs it always
// produces the same fingerprint. It does not resolve source identity or count commits —
// those are expected to be pre-resolved by the caller and passed via opts.
func ComputeIdentity(
fs opctx.FS,
component projectconfig.ComponentConfig,
distroRef projectconfig.DistroReference,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the distro that the component is being built for or the distro that the component's spec may have come from? Can we document and clarify?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be the effective distro for the component, ie if one is set, pick that, otherwise the global default.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, we can probably drop it to just ReleaseVer, that might be the only bit we really care about.

opts IdentityOptions,
) (*ComponentIdentity, error) {
inputs := ComponentInputs{
AffectsCommitCount: opts.AffectsCommitCount,
SourceIdentity: opts.SourceIdentity,
Distro: distroRef.Name,
DistroVersion: distroRef.Version,
}
Comment thread
dmcilvaney marked this conversation as resolved.
Comment thread
dmcilvaney marked this conversation as resolved.

// 1. Require source identity when the component has a spec source that
// contributes content. Without it the fingerprint cannot detect spec
// content changes (Spec.Path is excluded from the config hash).
if opts.SourceIdentity == "" && component.Spec.SourceType != "" {
return nil, fmt.Errorf(
"source identity is required for component with source type %#q; "+
"resolve it via SourceManager.ResolveSourceIdentity before calling ComputeIdentity",
component.Spec.SourceType)
Comment thread
dmcilvaney marked this conversation as resolved.
}

// 2. Verify all source files have a hash. Without a hash the fingerprint
// cannot detect content changes, so we refuse to compute one.
for i := range component.SourceFiles {
if component.SourceFiles[i].Hash == "" {
return nil, fmt.Errorf(
"source file %#q has no hash; cannot compute a deterministic fingerprint",
component.SourceFiles[i].Filename,
)
}
}

// 3. Hash the resolved config struct (excluding fingerprint:"-" fields).
configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{
TagName: hashstructureTagName,
})
if err != nil {
return nil, fmt.Errorf("hashing component config:\n%w", err)
}

inputs.ConfigHash = configHash

// 4. Hash overlay source file contents.
overlayHashes, err := hashOverlayFiles(fs, component.Overlays)
if err != nil {
return nil, fmt.Errorf("hashing overlay files:\n%w", err)
}

inputs.OverlayFileHashes = overlayHashes

// 5. Combine all inputs into the overall fingerprint.
return &ComponentIdentity{
Fingerprint: combineInputs(inputs),
Inputs: inputs,
}, nil
}

// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference
// local files. Returns a map of overlay index (as string) to a combined hash that
// captures both the file content and the source basename. The basename is included
// because some overlay types (e.g., patch-add) derive the destination filename from
// it when no explicit 'file' field is set.
func hashOverlayFiles(
fs opctx.FS,
overlays []projectconfig.ComponentOverlay,
) (map[string]string, error) {
hashes := make(map[string]string)

for idx, overlay := range overlays {
if overlay.Source == "" {
Comment thread
dmcilvaney marked this conversation as resolved.
Outdated
continue
}

fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source)
if err != nil {
return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err)
}

// Include the basename so that renaming a source file (which changes
// the derived patch filename in the rendered spec) changes the fingerprint.
baseName := filepath.Base(overlay.Source)
hashes[strconv.Itoa(idx)] = baseName + ":" + fileHash
}

return hashes, nil
}

// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint.
func combineInputs(inputs ComponentInputs) string {
hasher := sha256.New()

// Write each input in a fixed order with field labels for domain separation.
writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10))
writeField(hasher, "source_identity", inputs.SourceIdentity)
writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount))
writeField(hasher, "distro", inputs.Distro)
writeField(hasher, "distro_version", inputs.DistroVersion)

// Overlay file hashes in sorted key order for determinism.
if len(inputs.OverlayFileHashes) > 0 {
keys := make([]string, 0, len(inputs.OverlayFileHashes))
for key := range inputs.OverlayFileHashes {
keys = append(keys, key)
}

sort.Strings(keys)

for _, key := range keys {
writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key])
Comment thread
dmcilvaney marked this conversation as resolved.
}
}
Comment thread
dmcilvaney marked this conversation as resolved.
Comment thread
dmcilvaney marked this conversation as resolved.

return "sha256:" + hex.EncodeToString(hasher.Sum(nil))
}

// writeField writes a labeled value to the hasher for domain separation.
func writeField(writer io.Writer, label string, value string) {
// Length-prefix both label and value to prevent injection of fake field records
// via values containing newlines.
fmt.Fprintf(writer, "%d:%s=%d:%s\n", len(label), label, len(value), value)
}
Comment thread
dmcilvaney marked this conversation as resolved.
Loading
Loading