|
| 1 | +// Copyright (c) Microsoft Corporation. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +package fingerprint |
| 5 | + |
| 6 | +import ( |
| 7 | + "crypto/sha256" |
| 8 | + "encoding/hex" |
| 9 | + "fmt" |
| 10 | + "io" |
| 11 | + "sort" |
| 12 | + "strconv" |
| 13 | + |
| 14 | + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" |
| 15 | + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" |
| 16 | + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" |
| 17 | + "github.com/mitchellh/hashstructure/v2" |
| 18 | +) |
| 19 | + |
| 20 | +// hashstructureTagName is the struct tag name used by hashstructure to determine |
| 21 | +// field inclusion. Fields tagged with `fingerprint:"-"` are excluded. |
| 22 | +const hashstructureTagName = "fingerprint" |
| 23 | + |
| 24 | +// ComponentIdentity holds the computed fingerprint for a single component plus |
| 25 | +// a breakdown of individual input hashes for debugging. |
| 26 | +type ComponentIdentity struct { |
| 27 | + // Fingerprint is the overall SHA256 hash combining all inputs. |
| 28 | + Fingerprint string `json:"fingerprint"` |
| 29 | + // Inputs provides the individual input hashes that were combined. |
| 30 | + Inputs ComponentInputs `json:"inputs"` |
| 31 | +} |
| 32 | + |
| 33 | +// ComponentInputs contains the individual input hashes that comprise a component's |
| 34 | +// fingerprint. |
| 35 | +type ComponentInputs struct { |
| 36 | + // ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure). |
| 37 | + ConfigHash uint64 `json:"configHash"` |
| 38 | + // SourceIdentity is the opaque identity string for the component's source. |
| 39 | + // For local specs this is a content hash; for upstream specs this is a commit hash. |
| 40 | + SourceIdentity string `json:"sourceIdentity,omitempty"` |
| 41 | + // OverlayFileHashes maps overlay source file paths to their SHA256 hashes. |
| 42 | + OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"` |
| 43 | + // AffectsCommitCount is the number of "Affects: <component>" commits in the project repo. |
| 44 | + AffectsCommitCount int `json:"affectsCommitCount"` |
| 45 | + // Distro is the effective distro name. |
| 46 | + Distro string `json:"distro"` |
| 47 | + // DistroVersion is the effective distro version. |
| 48 | + DistroVersion string `json:"distroVersion"` |
| 49 | +} |
| 50 | + |
| 51 | +// IdentityOptions holds additional inputs for computing a component's identity |
| 52 | +// that are not part of the component config itself. |
| 53 | +type IdentityOptions struct { |
| 54 | + // AffectsCommitCount is the number of "Affects: <component>" commits. |
| 55 | + AffectsCommitCount int |
| 56 | + // SourceIdentity is the opaque identity string from a [sourceproviders.SourceIdentityProvider]. |
| 57 | + SourceIdentity string |
| 58 | +} |
| 59 | + |
| 60 | +// ComputeIdentity computes the fingerprint for a component from its resolved config |
| 61 | +// and additional context. The fs parameter is used to read spec file and overlay |
| 62 | +// source file contents for hashing. |
| 63 | +func ComputeIdentity( |
| 64 | + fs opctx.FS, |
| 65 | + component projectconfig.ComponentConfig, |
| 66 | + distroRef projectconfig.DistroReference, |
| 67 | + opts IdentityOptions, |
| 68 | +) (*ComponentIdentity, error) { |
| 69 | + inputs := ComponentInputs{ |
| 70 | + AffectsCommitCount: opts.AffectsCommitCount, |
| 71 | + SourceIdentity: opts.SourceIdentity, |
| 72 | + Distro: distroRef.Name, |
| 73 | + DistroVersion: distroRef.Version, |
| 74 | + } |
| 75 | + |
| 76 | + // 1. Hash the resolved config struct (excluding fingerprint:"-" fields). |
| 77 | + configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{ |
| 78 | + TagName: hashstructureTagName, |
| 79 | + }) |
| 80 | + if err != nil { |
| 81 | + return nil, fmt.Errorf("hashing component config:\n%w", err) |
| 82 | + } |
| 83 | + |
| 84 | + inputs.ConfigHash = configHash |
| 85 | + |
| 86 | + // 2. Hash overlay source file contents. |
| 87 | + overlayHashes, err := hashOverlayFiles(fs, component.Overlays) |
| 88 | + if err != nil { |
| 89 | + return nil, fmt.Errorf("hashing overlay files:\n%w", err) |
| 90 | + } |
| 91 | + |
| 92 | + inputs.OverlayFileHashes = overlayHashes |
| 93 | + |
| 94 | + // 3. Combine all inputs into the overall fingerprint. |
| 95 | + return &ComponentIdentity{ |
| 96 | + Fingerprint: combineInputs(inputs), |
| 97 | + Inputs: inputs, |
| 98 | + }, nil |
| 99 | +} |
| 100 | + |
| 101 | +// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference |
| 102 | +// local files. Returns a map of source path to hex hash, or an empty map if no overlay |
| 103 | +// source files exist. |
| 104 | +func hashOverlayFiles( |
| 105 | + fs opctx.FS, |
| 106 | + overlays []projectconfig.ComponentOverlay, |
| 107 | +) (map[string]string, error) { |
| 108 | + hashes := make(map[string]string) |
| 109 | + |
| 110 | + for _, overlay := range overlays { |
| 111 | + if overlay.Source == "" { |
| 112 | + continue |
| 113 | + } |
| 114 | + |
| 115 | + fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source) |
| 116 | + if err != nil { |
| 117 | + return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err) |
| 118 | + } |
| 119 | + |
| 120 | + hashes[overlay.Source] = fileHash |
| 121 | + } |
| 122 | + |
| 123 | + return hashes, nil |
| 124 | +} |
| 125 | + |
| 126 | +// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint. |
| 127 | +func combineInputs(inputs ComponentInputs) string { |
| 128 | + hasher := sha256.New() |
| 129 | + |
| 130 | + // Write each input in a fixed order with field labels for domain separation. |
| 131 | + writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10)) |
| 132 | + writeField(hasher, "source_identity", inputs.SourceIdentity) |
| 133 | + writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount)) |
| 134 | + writeField(hasher, "distro", inputs.Distro) |
| 135 | + writeField(hasher, "distro_version", inputs.DistroVersion) |
| 136 | + |
| 137 | + // Overlay file hashes in sorted key order for determinism. |
| 138 | + if len(inputs.OverlayFileHashes) > 0 { |
| 139 | + keys := make([]string, 0, len(inputs.OverlayFileHashes)) |
| 140 | + for key := range inputs.OverlayFileHashes { |
| 141 | + keys = append(keys, key) |
| 142 | + } |
| 143 | + |
| 144 | + sort.Strings(keys) |
| 145 | + |
| 146 | + for _, key := range keys { |
| 147 | + writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key]) |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + return "sha256:" + hex.EncodeToString(hasher.Sum(nil)) |
| 152 | +} |
| 153 | + |
| 154 | +// writeField writes a labeled value to the hasher for domain separation. |
| 155 | +func writeField(writer io.Writer, label string, value string) { |
| 156 | + // Use label=value\n format. Length-prefixing the label prevents |
| 157 | + // collisions between field names that are prefixes of each other. |
| 158 | + fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value) |
| 159 | +} |
0 commit comments