Skip to content

Commit f6e7908

Browse files
ashokn1claude
andcommitted
feat: add per-layer package attribution (opt-in)
Introduces `computeLayerAttribution` in `lib/analyzer/layer-attribution.ts` and wires it through the full pipeline. Enabled with `--layer-attribution`. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 7936ba5 commit f6e7908

28 files changed

+1118
-83
lines changed

lib/analyzer/layer-attribution.ts

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
import { ExtractedLayers, HistoryEntry } from "../extractor/types";
2+
import { LayerAttributionEntry } from "../facts";
3+
import {
4+
getApkDbFileContent,
5+
getApkDbFileContentAction,
6+
} from "../inputs/apk/static";
7+
import {
8+
getAptDbFileContent,
9+
getDpkgFileContentAction,
10+
} from "../inputs/apt/static";
11+
import {
12+
getChiselManifestAction,
13+
getChiselManifestContent,
14+
} from "../inputs/chisel/static";
15+
import {
16+
getRpmDbFileContent,
17+
getRpmDbFileContentAction,
18+
getRpmNdbFileContent,
19+
getRpmNdbFileContentAction,
20+
getRpmSqliteDbFileContent,
21+
getRpmSqliteDbFileContentAction,
22+
} from "../inputs/rpm/static";
23+
import { analyze as apkAnalyze } from "./package-managers/apk";
24+
import { analyze as aptAnalyze } from "./package-managers/apt";
25+
import { analyze as chiselAnalyze } from "./package-managers/chisel";
26+
import { analyze as rpmAnalyze } from "./package-managers/rpm";
27+
import { AnalysisType } from "./types";
28+
29+
export interface LayerAttributionResult {
30+
entries: LayerAttributionEntry[];
31+
pkgLayerMap: Map<string, { layerIndex: number; diffID: string }>;
32+
}
33+
34+
function buildHistoryInstructions(
35+
history: HistoryEntry[] | null | undefined,
36+
): string[] {
37+
if (!history) {
38+
return [];
39+
}
40+
return history.filter((h) => !h.empty_layer).map((h) => h.created_by ?? "");
41+
}
42+
43+
function pkgKey(name: string, version: string): string {
44+
return `${name}@${version}`;
45+
}
46+
47+
/**
48+
* Returns true if the layer contains a file that was processed by the given
49+
* extract action. Used to distinguish "layer has no package DB" (return null
50+
* → skip) from "layer has an empty package DB" (return empty Set → track).
51+
*/
52+
function layerHasAction(layer: ExtractedLayers, actionName: string): boolean {
53+
return Object.values(layer).some((fileContent) => actionName in fileContent);
54+
}
55+
56+
/**
57+
* Parses the package DB for a single layer and returns the set of
58+
* "name@version" keys present in that layer.
59+
*
60+
* Returns null when the layer does not contain the package DB file at all
61+
* (e.g. a COPY or ENV instruction). An empty Set means the DB file exists
62+
* but is empty (e.g. all packages were removed in this layer).
63+
*/
64+
async function parseLayerPackages(
65+
layer: ExtractedLayers,
66+
analysisType: AnalysisType,
67+
targetImage: string,
68+
): Promise<Set<string> | null> {
69+
if (analysisType === AnalysisType.Apk) {
70+
if (!layerHasAction(layer, getApkDbFileContentAction.actionName)) {
71+
return null;
72+
}
73+
const content = getApkDbFileContent(layer);
74+
const analysis = await apkAnalyze(targetImage, content);
75+
const result = new Set<string>();
76+
for (const pkg of analysis.Analysis) {
77+
result.add(pkgKey(pkg.Name, pkg.Version));
78+
}
79+
return result;
80+
}
81+
82+
if (analysisType === AnalysisType.Apt) {
83+
if (!layerHasAction(layer, getDpkgFileContentAction.actionName)) {
84+
return null;
85+
}
86+
const aptFiles = getAptDbFileContent(layer);
87+
const analysis = await aptAnalyze(targetImage, aptFiles);
88+
const result = new Set<string>();
89+
for (const pkg of analysis.Analysis) {
90+
result.add(pkgKey(pkg.Name, pkg.Version));
91+
}
92+
return result;
93+
}
94+
95+
if (analysisType === AnalysisType.Rpm) {
96+
const hasBdb = layerHasAction(layer, getRpmDbFileContentAction.actionName);
97+
const hasNdb = layerHasAction(layer, getRpmNdbFileContentAction.actionName);
98+
const hasSqlite = layerHasAction(
99+
layer,
100+
getRpmSqliteDbFileContentAction.actionName,
101+
);
102+
if (!hasBdb && !hasNdb && !hasSqlite) {
103+
return null;
104+
}
105+
const [bdbPkgs, ndbPkgs, sqlitePkgs] = await Promise.all([
106+
hasBdb ? getRpmDbFileContent(layer) : Promise.resolve([]),
107+
hasNdb ? getRpmNdbFileContent(layer) : Promise.resolve([]),
108+
hasSqlite ? getRpmSqliteDbFileContent(layer) : Promise.resolve([]),
109+
]);
110+
const analysis = await rpmAnalyze(
111+
targetImage,
112+
[...bdbPkgs, ...ndbPkgs, ...sqlitePkgs],
113+
[],
114+
);
115+
const result = new Set<string>();
116+
for (const pkg of analysis.Analysis) {
117+
result.add(pkgKey(pkg.Name, pkg.Version));
118+
}
119+
return result;
120+
}
121+
122+
if (analysisType === AnalysisType.Chisel) {
123+
if (!layerHasAction(layer, getChiselManifestAction.actionName)) {
124+
return null;
125+
}
126+
const pkgs = getChiselManifestContent(layer);
127+
const analysis = await chiselAnalyze(targetImage, pkgs);
128+
const result = new Set<string>();
129+
for (const pkg of analysis.Analysis) {
130+
result.add(pkgKey(pkg.Name, pkg.Version));
131+
}
132+
return result;
133+
}
134+
135+
return null;
136+
}
137+
138+
export async function computeLayerAttribution(
139+
orderedLayers: ExtractedLayers[],
140+
analysisType: AnalysisType,
141+
rootFsLayers: string[],
142+
manifestLayers: string[],
143+
history: HistoryEntry[] | null | undefined,
144+
targetImage: string,
145+
): Promise<LayerAttributionResult> {
146+
const instructions = buildHistoryInstructions(history);
147+
const entries: LayerAttributionEntry[] = [];
148+
const pkgLayerMap = new Map<string, { layerIndex: number; diffID: string }>();
149+
const limit = Math.min(orderedLayers.length, rootFsLayers.length);
150+
151+
let previousPkgs = new Set<string>();
152+
153+
for (let i = 0; i < limit; i++) {
154+
const diffID = rootFsLayers[i];
155+
// Explicit bounds guard: manifestLayers and instructions may be shorter
156+
// than rootFsLayers for malformed or partially-described images.
157+
const digest = i < manifestLayers.length ? manifestLayers[i] : undefined;
158+
const instruction = i < instructions.length ? instructions[i] : undefined;
159+
160+
const currentPkgs = await parseLayerPackages(
161+
orderedLayers[i],
162+
analysisType,
163+
targetImage,
164+
);
165+
if (currentPkgs === null) {
166+
// Layer has no package DB file (e.g. COPY/ENV/LABEL instruction).
167+
// Do not update previousPkgs — the package state has not changed.
168+
continue;
169+
}
170+
171+
const newPkgs: string[] = [];
172+
for (const key of currentPkgs) {
173+
if (!previousPkgs.has(key)) {
174+
newPkgs.push(key);
175+
pkgLayerMap.set(key, { layerIndex: i, diffID });
176+
}
177+
}
178+
179+
const removedPkgs: string[] = [];
180+
for (const key of previousPkgs) {
181+
if (!currentPkgs.has(key)) {
182+
removedPkgs.push(key);
183+
}
184+
}
185+
186+
if (newPkgs.length > 0 || removedPkgs.length > 0) {
187+
const entry: LayerAttributionEntry = {
188+
layerIndex: i,
189+
diffID,
190+
packages: newPkgs,
191+
};
192+
if (digest) {
193+
entry.digest = digest;
194+
}
195+
if (instruction) {
196+
entry.instruction = instruction;
197+
}
198+
if (removedPkgs.length > 0) {
199+
entry.removedPackages = removedPkgs;
200+
}
201+
entries.push(entry);
202+
}
203+
204+
previousPkgs = currentPkgs;
205+
}
206+
207+
return { entries, pkgLayerMap };
208+
}
209+
210+
/**
211+
* Merges attribution entries produced by multiple package managers into a
212+
* single list sorted by layer index. When two managers both write entries for
213+
* the same layer (e.g. APT and Chisel in a mixed image), their package lists
214+
* and removedPackages lists are combined. Layer metadata (diffID, digest,
215+
* instruction) is taken from the first entry seen for that layer index.
216+
*/
217+
export function mergeLayerAttributionEntries(
218+
entries: LayerAttributionEntry[],
219+
): LayerAttributionEntry[] {
220+
const byLayer = new Map<number, LayerAttributionEntry>();
221+
222+
for (const entry of entries) {
223+
const existing = byLayer.get(entry.layerIndex);
224+
if (!existing) {
225+
byLayer.set(entry.layerIndex, {
226+
...entry,
227+
packages: [...entry.packages],
228+
removedPackages: entry.removedPackages
229+
? [...entry.removedPackages]
230+
: undefined,
231+
});
232+
} else {
233+
existing.packages.push(...entry.packages);
234+
if (entry.removedPackages && entry.removedPackages.length > 0) {
235+
if (!existing.removedPackages) {
236+
existing.removedPackages = [...entry.removedPackages];
237+
} else {
238+
existing.removedPackages.push(...entry.removedPackages);
239+
}
240+
}
241+
}
242+
}
243+
244+
return Array.from(byLayer.values()).sort(
245+
(a, b) => a.layerIndex - b.layerIndex,
246+
);
247+
}

lib/analyzer/static-analyzer.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import * as Debug from "debug";
22
import { DockerFileAnalysis } from "../dockerfile";
33
import { getErrorMessage } from "../error-utils";
44
import * as archiveExtractor from "../extractor";
5+
import { LayerAttributionEntry } from "../facts";
56
import {
67
getGoModulesContentAction,
78
goModulesToScannedProjects,
@@ -70,6 +71,10 @@ import { pipFilesToScannedProjects } from "./applications/python";
7071
import { getApplicationFiles } from "./applications/runtime-common";
7172
import { AppDepsScanResultWithoutTarget } from "./applications/types";
7273
import { detectJavaRuntime } from "./base-runtimes";
74+
import {
75+
computeLayerAttribution,
76+
mergeLayerAttributionEntries,
77+
} from "./layer-attribution";
7378
import * as osReleaseDetector from "./os-release";
7479
import { analyze as apkAnalyze } from "./package-managers/apk";
7580
import {
@@ -159,6 +164,7 @@ export async function analyze(
159164
imageId,
160165
manifestLayers,
161166
extractedLayers,
167+
orderedLayers,
162168
rootFsLayers,
163169
autoDetectedUserInstructions,
164170
platform,
@@ -236,6 +242,42 @@ export async function analyze(
236242
throw new Error("Failed to detect installed OS packages");
237243
}
238244

245+
let layerPackageAttribution: LayerAttributionEntry[] | undefined;
246+
if (
247+
isTrue(options["layer-attribution"]) &&
248+
rootFsLayers &&
249+
orderedLayers.length > 0
250+
) {
251+
const resultsWithPackages = results.filter((r) => r.Analysis.length > 0);
252+
if (resultsWithPackages.length > 0) {
253+
const allEntries: LayerAttributionEntry[] = [];
254+
for (const result of resultsWithPackages) {
255+
try {
256+
const { entries, pkgLayerMap } = await computeLayerAttribution(
257+
orderedLayers,
258+
result.AnalyzeType,
259+
rootFsLayers,
260+
manifestLayers,
261+
history,
262+
targetImage,
263+
);
264+
allEntries.push(...entries);
265+
for (const pkg of result.Analysis) {
266+
const key = `${pkg.Name}@${pkg.Version}`;
267+
const attr = pkgLayerMap.get(key);
268+
if (attr) {
269+
pkg.layerIndex = attr.layerIndex;
270+
pkg.layerDiffId = attr.diffID;
271+
}
272+
}
273+
} catch (err) {
274+
debug(`Could not compute layer attribution: ${getErrorMessage(err)}`);
275+
}
276+
}
277+
layerPackageAttribution = mergeLayerAttributionEntries(allEntries);
278+
}
279+
}
280+
239281
const binaries = getBinariesHashes(extractedLayers);
240282
const javaRuntime = detectJavaRuntime(extractedLayers);
241283
const baseRuntimes = javaRuntime ? [javaRuntime] : undefined;
@@ -318,6 +360,7 @@ export async function analyze(
318360
baseRuntimes,
319361
imageLayers: manifestLayers,
320362
rootFsLayers,
363+
layerPackageAttribution,
321364
applicationDependenciesScanResults,
322365
manifestFiles,
323366
autoDetectedUserInstructions,

lib/analyzer/types.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { ImageName } from "../extractor/image";
2-
import { BaseRuntime } from "../facts";
2+
import { BaseRuntime, LayerAttributionEntry } from "../facts";
33
import { AutoDetectedUserInstructions, ManifestFile } from "../types";
44
import {
55
AppDepsScanResultWithoutTarget,
@@ -17,6 +17,8 @@ export interface AnalyzedPackage {
1717
};
1818
Purl?: string;
1919
AutoInstalled?: boolean;
20+
layerIndex?: number;
21+
layerDiffId?: string;
2022
}
2123
export interface AnalyzedPackageWithVersion extends AnalyzedPackage {
2224
Version: string;
@@ -79,6 +81,7 @@ export interface StaticAnalysis {
7981
baseRuntimes?: BaseRuntime[];
8082
imageLayers: string[];
8183
rootFsLayers?: string[];
84+
layerPackageAttribution?: LayerAttributionEntry[];
8285
autoDetectedUserInstructions?: AutoDetectedUserInstructions;
8386
applicationDependenciesScanResults: AppDepsScanResultWithoutTarget[];
8487
manifestFiles: ManifestFile[];

lib/dependency-tree/index.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,19 @@ export function buildTree(
121121
};
122122

123123
for (const depInfo of tooFrequentDeps) {
124+
const freqLabels: { [key: string]: string } = {};
125+
if (depInfo.layerDiffId !== undefined) {
126+
freqLabels.layerDiffId = depInfo.layerDiffId;
127+
}
128+
if (depInfo.layerIndex !== undefined) {
129+
freqLabels.layerIndex = String(depInfo.layerIndex);
130+
}
124131
const pkg: DepTreeDep = {
125132
name: depFullName(depInfo),
126133
version: depInfo.Version,
127134
sourceVersion: depInfo.SourceVersion,
128135
dependencies: {},
136+
...(Object.keys(freqLabels).length > 0 ? { labels: freqLabels } : {}),
129137
};
130138

131139
// The existence of the "meta" package breaks upgrade
@@ -172,11 +180,20 @@ function buildTreeRecursive(
172180
return null;
173181
}
174182

183+
const labels: { [key: string]: string } = {};
184+
if (depInfo.layerDiffId !== undefined) {
185+
labels.layerDiffId = depInfo.layerDiffId;
186+
}
187+
if (depInfo.layerIndex !== undefined) {
188+
labels.layerIndex = String(depInfo.layerIndex);
189+
}
190+
175191
const tree: DepTreeDep = {
176192
name: fullName,
177193
version: depInfo.Version,
178194
purl: depInfo.Purl,
179195
dependencies: {},
196+
...(Object.keys(labels).length > 0 ? { labels } : {}),
180197
};
181198
if (depInfo._visited) {
182199
return tree;

lib/extractor/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ export async function extractImageContent(
147147
manifestLayers: extractor.getManifestLayers(archiveContent.manifest),
148148
imageCreationTime: archiveContent.imageConfig.created,
149149
extractedLayers: layersWithLatestFileModifications(archiveContent.layers),
150+
orderedLayers: archiveContent.layers,
150151
rootFsLayers: getRootFsLayersFromConfig(archiveContent.imageConfig),
151152
autoDetectedUserInstructions: getDetectedLayersInfoFromConfig(
152153
archiveContent.imageConfig,

0 commit comments

Comments
 (0)