|
| 1 | +// Copyright (c) Microsoft Corporation. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +package fingerprint |
| 5 | + |
| 6 | +import ( |
| 7 | + "crypto/sha256" |
| 8 | + "encoding/hex" |
| 9 | + "fmt" |
| 10 | + "io" |
| 11 | + "sort" |
| 12 | + "strconv" |
| 13 | + |
| 14 | + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" |
| 15 | + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" |
| 16 | + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" |
| 17 | + "github.com/mitchellh/hashstructure/v2" |
| 18 | +) |
| 19 | + |
| 20 | +// hashstructureTagName is the struct tag name used by hashstructure to determine |
| 21 | +// field inclusion. Fields tagged with `fingerprint:"-"` are excluded. |
| 22 | +const hashstructureTagName = "fingerprint" |
| 23 | + |
| 24 | +// ComponentIdentity holds the computed fingerprint for a single component plus |
| 25 | +// a breakdown of individual input hashes for debugging. |
| 26 | +type ComponentIdentity struct { |
| 27 | + // Fingerprint is the overall SHA256 hash combining all inputs. |
| 28 | + Fingerprint string `json:"fingerprint"` |
| 29 | + // Inputs provides the individual input hashes that were combined. |
| 30 | + Inputs ComponentInputs `json:"inputs"` |
| 31 | +} |
| 32 | + |
| 33 | +// ComponentInputs contains the individual input hashes that comprise a component's |
| 34 | +// fingerprint. |
| 35 | +type ComponentInputs struct { |
| 36 | + // ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure). |
| 37 | + ConfigHash uint64 `json:"configHash"` |
| 38 | + // SpecContentHash is the SHA256 of the spec file content (local specs only). |
| 39 | + SpecContentHash string `json:"specContentHash,omitempty"` |
| 40 | + // OverlayFileHashes maps overlay source file paths to their SHA256 hashes. |
| 41 | + OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"` |
| 42 | + // AffectsCommitCount is the number of "Affects: <component>" commits in the project repo. |
| 43 | + AffectsCommitCount int `json:"affectsCommitCount"` |
| 44 | + // Distro is the effective distro name. |
| 45 | + Distro string `json:"distro"` |
| 46 | + // DistroVersion is the effective distro version. |
| 47 | + DistroVersion string `json:"distroVersion"` |
| 48 | +} |
| 49 | + |
| 50 | +// ComputeIdentity computes the fingerprint for a component from its resolved config |
| 51 | +// and additional context. The fs parameter is used to read spec file and overlay |
| 52 | +// source file contents for hashing. |
| 53 | +func ComputeIdentity( |
| 54 | + fs opctx.FS, |
| 55 | + component projectconfig.ComponentConfig, |
| 56 | + distroRef projectconfig.DistroReference, |
| 57 | + affectsCommitCount int, |
| 58 | +) (*ComponentIdentity, error) { |
| 59 | + inputs := ComponentInputs{ |
| 60 | + AffectsCommitCount: affectsCommitCount, |
| 61 | + Distro: distroRef.Name, |
| 62 | + DistroVersion: distroRef.Version, |
| 63 | + } |
| 64 | + |
| 65 | + // 1. Hash the resolved config struct (excluding fingerprint:"-" fields). |
| 66 | + configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{ |
| 67 | + TagName: hashstructureTagName, |
| 68 | + }) |
| 69 | + if err != nil { |
| 70 | + return nil, fmt.Errorf("hashing component config:\n%w", err) |
| 71 | + } |
| 72 | + |
| 73 | + inputs.ConfigHash = configHash |
| 74 | + |
| 75 | + // 2. Hash spec file content for local specs. |
| 76 | + if component.Spec.SourceType == projectconfig.SpecSourceTypeLocal || |
| 77 | + component.Spec.SourceType == projectconfig.SpecSourceTypeUnspecified { |
| 78 | + if component.Spec.Path != "" { |
| 79 | + specHash, fileErr := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, component.Spec.Path) |
| 80 | + if fileErr != nil { |
| 81 | + return nil, fmt.Errorf("hashing spec file %#q:\n%w", component.Spec.Path, fileErr) |
| 82 | + } |
| 83 | + |
| 84 | + inputs.SpecContentHash = specHash |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + // 3. Hash overlay source file contents. |
| 89 | + overlayHashes, err := hashOverlayFiles(fs, component.Overlays) |
| 90 | + if err != nil { |
| 91 | + return nil, fmt.Errorf("hashing overlay files:\n%w", err) |
| 92 | + } |
| 93 | + |
| 94 | + inputs.OverlayFileHashes = overlayHashes |
| 95 | + |
| 96 | + // 4. Combine all inputs into the overall fingerprint. |
| 97 | + return &ComponentIdentity{ |
| 98 | + Fingerprint: combineInputs(inputs), |
| 99 | + Inputs: inputs, |
| 100 | + }, nil |
| 101 | +} |
| 102 | + |
| 103 | +// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference |
| 104 | +// local files. Returns a map of source path to hex hash, or an empty map if no overlay |
| 105 | +// source files exist. |
| 106 | +func hashOverlayFiles( |
| 107 | + fs opctx.FS, |
| 108 | + overlays []projectconfig.ComponentOverlay, |
| 109 | +) (map[string]string, error) { |
| 110 | + hashes := make(map[string]string) |
| 111 | + |
| 112 | + for _, overlay := range overlays { |
| 113 | + if overlay.Source == "" { |
| 114 | + continue |
| 115 | + } |
| 116 | + |
| 117 | + fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source) |
| 118 | + if err != nil { |
| 119 | + return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err) |
| 120 | + } |
| 121 | + |
| 122 | + hashes[overlay.Source] = fileHash |
| 123 | + } |
| 124 | + |
| 125 | + return hashes, nil |
| 126 | +} |
| 127 | + |
| 128 | +// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint. |
| 129 | +func combineInputs(inputs ComponentInputs) string { |
| 130 | + hasher := sha256.New() |
| 131 | + |
| 132 | + // Write each input in a fixed order with field labels for domain separation. |
| 133 | + writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10)) |
| 134 | + writeField(hasher, "spec_content_hash", inputs.SpecContentHash) |
| 135 | + writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount)) |
| 136 | + writeField(hasher, "distro", inputs.Distro) |
| 137 | + writeField(hasher, "distro_version", inputs.DistroVersion) |
| 138 | + |
| 139 | + // Overlay file hashes in sorted key order for determinism. |
| 140 | + if len(inputs.OverlayFileHashes) > 0 { |
| 141 | + keys := make([]string, 0, len(inputs.OverlayFileHashes)) |
| 142 | + for key := range inputs.OverlayFileHashes { |
| 143 | + keys = append(keys, key) |
| 144 | + } |
| 145 | + |
| 146 | + sort.Strings(keys) |
| 147 | + |
| 148 | + for _, key := range keys { |
| 149 | + writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key]) |
| 150 | + } |
| 151 | + } |
| 152 | + |
| 153 | + return "sha256:" + hex.EncodeToString(hasher.Sum(nil)) |
| 154 | +} |
| 155 | + |
| 156 | +// writeField writes a labeled value to the hasher for domain separation. |
| 157 | +func writeField(writer io.Writer, label string, value string) { |
| 158 | + // Use label=value\n format. Length-prefixing the label prevents |
| 159 | + // collisions between field names that are prefixes of each other. |
| 160 | + fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value) |
| 161 | +} |
0 commit comments