Skip to content

Commit 9a76538

Browse files
committed
chore: refactored docker and kaniko archive extraction
1 parent 7936ba5 commit 9a76538

File tree

9 files changed

+347
-365
lines changed

9 files changed

+347
-365
lines changed
Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,13 @@
1-
import { normalize as normalizePath } from "path";
2-
import { HashAlgorithm } from "../../types";
1+
import {
2+
createGetImageIdFromManifest,
3+
dockerArchiveConfig,
4+
getManifestLayers,
5+
} from "../generic-archive-extractor";
36

4-
import { DockerArchiveManifest } from "../types";
57
export { extractArchive } from "./layer";
68

7-
export function getManifestLayers(manifest: DockerArchiveManifest) {
8-
return manifest.Layers.map((layer) => normalizePath(layer));
9-
}
9+
export { getManifestLayers };
1010

11-
export function getImageIdFromManifest(
12-
manifest: DockerArchiveManifest,
13-
): string {
14-
try {
15-
const imageId = manifest.Config.split(".")[0];
16-
if (imageId.includes(":")) {
17-
// imageId includes the algorithm prefix
18-
return imageId;
19-
}
20-
return `${HashAlgorithm.Sha256}:${imageId}`;
21-
} catch (err) {
22-
throw new Error("Failed to extract image ID from archive manifest");
23-
}
24-
}
11+
export const getImageIdFromManifest = createGetImageIdFromManifest(
12+
dockerArchiveConfig,
13+
);
Lines changed: 4 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,6 @@
1-
import * as Debug from "debug";
2-
import { createReadStream } from "fs";
3-
import * as gunzip from "gunzip-maybe";
4-
import { basename, normalize as normalizePath } from "path";
5-
import { Readable } from "stream";
6-
import { extract, Extract } from "tar-stream";
7-
import { InvalidArchiveError } from "..";
8-
import { getErrorMessage } from "../../error-utils";
9-
import { streamToJson } from "../../stream-utils";
10-
import { PluginOptions } from "../../types";
11-
import { extractImageLayer } from "../layer";
121
import {
13-
DockerArchiveManifest,
14-
ExtractAction,
15-
ExtractedLayers,
16-
ExtractedLayersAndManifest,
17-
ImageConfig,
18-
} from "../types";
2+
createExtractArchive,
3+
dockerArchiveConfig,
4+
} from "../generic-archive-extractor";
195

20-
const debug = Debug("snyk");
21-
22-
/**
23-
* Retrieve the products of files content from the specified docker-archive.
24-
* @param dockerArchiveFilesystemPath Path to image file saved in docker-archive format.
25-
* @param extractActions Array of pattern-callbacks pairs.
26-
* @param options PluginOptions
27-
* @returns Array of extracted files products sorted by the reverse order of the layers from last to first.
28-
*/
29-
export async function extractArchive(
30-
dockerArchiveFilesystemPath: string,
31-
extractActions: ExtractAction[],
32-
_options: Partial<PluginOptions>,
33-
): Promise<ExtractedLayersAndManifest> {
34-
return new Promise((resolve, reject) => {
35-
const tarExtractor: Extract = extract();
36-
const layers: Record<string, ExtractedLayers> = {};
37-
let manifest: DockerArchiveManifest;
38-
let imageConfig: ImageConfig;
39-
40-
tarExtractor.on("entry", async (header, stream, next) => {
41-
if (header.type === "file") {
42-
const normalizedName = normalizePath(header.name);
43-
if (isTarFile(normalizedName)) {
44-
try {
45-
layers[normalizedName] = await extractImageLayer(
46-
stream,
47-
extractActions,
48-
);
49-
} catch (error) {
50-
debug(
51-
`Error extracting layer content from: '${getErrorMessage(
52-
error,
53-
)}'`,
54-
);
55-
reject(new Error("Error reading tar archive"));
56-
}
57-
} else if (isManifestFile(normalizedName)) {
58-
const manifestArray = await getManifestFile<DockerArchiveManifest[]>(
59-
stream,
60-
);
61-
manifest = manifestArray[0];
62-
} else if (isImageConfigFile(normalizedName)) {
63-
imageConfig = await getManifestFile<ImageConfig>(stream);
64-
}
65-
}
66-
67-
stream.resume(); // auto drain the stream
68-
next(); // ready for next entry
69-
});
70-
71-
tarExtractor.on("finish", () => {
72-
try {
73-
resolve(
74-
getLayersContentAndArchiveManifest(manifest, imageConfig, layers),
75-
);
76-
} catch (error) {
77-
debug(
78-
`Error getting layers and manifest content from docker archive: ${getErrorMessage(
79-
error,
80-
)}`,
81-
);
82-
reject(new InvalidArchiveError("Invalid Docker archive"));
83-
}
84-
});
85-
86-
tarExtractor.on("error", (error) => reject(error));
87-
88-
createReadStream(dockerArchiveFilesystemPath)
89-
.pipe(gunzip())
90-
.pipe(tarExtractor);
91-
});
92-
}
93-
94-
function getLayersContentAndArchiveManifest(
95-
manifest: DockerArchiveManifest,
96-
imageConfig: ImageConfig,
97-
layers: Record<string, ExtractedLayers>,
98-
): ExtractedLayersAndManifest {
99-
// skip (ignore) non-existent layers
100-
// get the layers content without the name
101-
// reverse layers order from last to first
102-
const layersWithNormalizedNames = manifest.Layers.map((layersName) =>
103-
normalizePath(layersName),
104-
);
105-
const filteredLayers = layersWithNormalizedNames
106-
.filter((layersName) => layers[layersName])
107-
.map((layerName) => layers[layerName])
108-
.reverse();
109-
110-
if (filteredLayers.length === 0) {
111-
throw new Error("We found no layers in the provided image");
112-
}
113-
114-
return {
115-
layers: filteredLayers,
116-
manifest,
117-
imageConfig,
118-
};
119-
}
120-
121-
/**
122-
* Note: consumes the stream.
123-
*/
124-
async function getManifestFile<T>(stream: Readable): Promise<T> {
125-
return streamToJson<T>(stream);
126-
}
127-
128-
function isManifestFile(name: string): boolean {
129-
return name === "manifest.json";
130-
}
131-
132-
function isImageConfigFile(name: string): boolean {
133-
const configRegex = new RegExp("[A-Fa-f0-9]{64}\\.json");
134-
return configRegex.test(name);
135-
}
136-
137-
function isTarFile(name: string): boolean {
138-
// For both "docker save" and "skopeo copy" style archives the
139-
// layers are represented as tar archives whose names end in .tar.
140-
// For Docker this is "layer.tar", for Skopeo - "<sha256ofLayer>.tar".
141-
return basename(name).endsWith(".tar");
142-
}
6+
export const extractArchive = createExtractArchive(dockerArchiveConfig);
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import * as Debug from "debug";
2+
import { createReadStream } from "fs";
3+
import * as gunzip from "gunzip-maybe";
4+
import { basename, normalize as normalizePath } from "path";
5+
import { Readable } from "stream";
6+
import { extract, Extract } from "tar-stream";
7+
import { streamToJson } from "../stream-utils";
8+
9+
export class InvalidArchiveError extends Error {
10+
constructor(message: string) {
11+
super();
12+
this.name = "InvalidArchiveError";
13+
this.message = message;
14+
}
15+
}
16+
import { HashAlgorithm, PluginOptions } from "../types";
17+
import { extractImageLayer } from "./layer";
18+
import {
19+
ExtractAction,
20+
ExtractedLayers,
21+
ExtractedLayersAndManifest,
22+
ImageConfig,
23+
TarArchiveManifest,
24+
} from "./types";
25+
26+
const debug = Debug("snyk");
27+
28+
export interface ArchiveConfig {
29+
isLayerFile: (name: string) => boolean;
30+
isImageConfigFile: (name: string) => boolean;
31+
formatLabel: string;
32+
layerErrorType: string;
33+
extractImageId: (configValue: string) => string;
34+
}
35+
36+
export const dockerArchiveConfig: ArchiveConfig = {
37+
isLayerFile: (name) => basename(name).endsWith(".tar"),
38+
isImageConfigFile: (name) =>
39+
new RegExp("[A-Fa-f0-9]{64}\\.json").test(name),
40+
formatLabel: "Docker",
41+
layerErrorType: "tar",
42+
extractImageId: (configValue) => configValue.split(".")[0],
43+
};
44+
45+
export const kanikoArchiveConfig: ArchiveConfig = {
46+
isLayerFile: (name) => basename(name).endsWith(".tar.gz"),
47+
isImageConfigFile: (name) =>
48+
new RegExp("sha256:[A-Fa-f0-9]{64}").test(name),
49+
formatLabel: "Kaniko",
50+
layerErrorType: "tar.gz",
51+
extractImageId: (configValue) => configValue,
52+
};
53+
54+
export function createExtractArchive(
55+
config: ArchiveConfig,
56+
): (
57+
archiveFilesystemPath: string,
58+
extractActions: ExtractAction[],
59+
options: Partial<PluginOptions>,
60+
) => Promise<ExtractedLayersAndManifest> {
61+
return (archiveFilesystemPath, extractActions, _options) =>
62+
new Promise((resolve, reject) => {
63+
const tarExtractor: Extract = extract();
64+
const layers: Record<string, ExtractedLayers> = {};
65+
let manifest: TarArchiveManifest;
66+
let imageConfig: ImageConfig;
67+
68+
tarExtractor.on("entry", async (header, stream, next) => {
69+
if (header.type === "file") {
70+
const normalizedName = normalizePath(header.name);
71+
if (config.isLayerFile(normalizedName)) {
72+
try {
73+
layers[normalizedName] = await extractImageLayer(
74+
stream,
75+
extractActions,
76+
);
77+
} catch (error) {
78+
debug(`Error extracting layer content from: '${error.message}'`);
79+
reject(
80+
new Error(
81+
`Error reading ${config.layerErrorType} archive`,
82+
),
83+
);
84+
}
85+
} else if (isManifestFile(normalizedName)) {
86+
const manifestArray =
87+
await getManifestFile<TarArchiveManifest[]>(stream);
88+
manifest = manifestArray[0];
89+
} else if (config.isImageConfigFile(normalizedName)) {
90+
imageConfig = await getManifestFile<ImageConfig>(stream);
91+
}
92+
}
93+
94+
stream.resume();
95+
next();
96+
});
97+
98+
tarExtractor.on("finish", () => {
99+
try {
100+
resolve(
101+
assembleLayersAndManifest(manifest, imageConfig, layers),
102+
);
103+
} catch (error) {
104+
debug(
105+
`Error getting layers and manifest content from ${config.formatLabel} archive: ${error.message}`,
106+
);
107+
reject(
108+
new InvalidArchiveError(`Invalid ${config.formatLabel} archive`),
109+
);
110+
}
111+
});
112+
113+
tarExtractor.on("error", (error) => reject(error));
114+
115+
createReadStream(archiveFilesystemPath)
116+
.on("error", (error) => reject(error))
117+
.pipe(gunzip())
118+
.pipe(tarExtractor);
119+
});
120+
}
121+
122+
function assembleLayersAndManifest(
123+
manifest: TarArchiveManifest,
124+
imageConfig: ImageConfig,
125+
layers: Record<string, ExtractedLayers>,
126+
): ExtractedLayersAndManifest {
127+
const layersWithNormalizedNames = manifest.Layers.map((layerName) =>
128+
normalizePath(layerName),
129+
);
130+
const filteredLayers = layersWithNormalizedNames
131+
.filter((layerName) => layers[layerName])
132+
.map((layerName) => layers[layerName])
133+
.reverse();
134+
135+
if (filteredLayers.length === 0) {
136+
throw new Error("We found no layers in the provided image");
137+
}
138+
139+
return {
140+
layers: filteredLayers,
141+
manifest,
142+
imageConfig,
143+
};
144+
}
145+
146+
async function getManifestFile<T>(stream: Readable): Promise<T> {
147+
return streamToJson<T>(stream);
148+
}
149+
150+
function isManifestFile(name: string): boolean {
151+
return name === "manifest.json";
152+
}
153+
154+
export function createGetImageIdFromManifest(
155+
config: ArchiveConfig,
156+
): (manifest: TarArchiveManifest) => string {
157+
return (manifest) => {
158+
try {
159+
const imageId = config.extractImageId(manifest.Config);
160+
if (imageId.includes(":")) {
161+
return imageId;
162+
}
163+
return `${HashAlgorithm.Sha256}:${imageId}`;
164+
} catch (err) {
165+
throw new Error("Failed to extract image ID from archive manifest");
166+
}
167+
};
168+
}
169+
170+
export function getManifestLayers(manifest: TarArchiveManifest): string[] {
171+
return manifest.Layers.map((layer) => normalizePath(layer));
172+
}

lib/extractor/index.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,12 @@ import {
2121
ImageConfig,
2222
OciArchiveManifest,
2323
} from "./types";
24+
import { isWhitedOutFile } from "./layer";
25+
import { InvalidArchiveError } from "./generic-archive-extractor";
2426

2527
const debug = Debug("snyk");
2628

27-
export class InvalidArchiveError extends Error {
28-
constructor(message) {
29-
super();
30-
this.name = "InvalidArchiveError";
31-
this.message = message;
32-
}
33-
}
29+
export { InvalidArchiveError } from "./generic-archive-extractor";
3430
class ArchiveExtractor {
3531
private extractor: Extractor;
3632
private fileSystemPath: string;
@@ -263,9 +259,7 @@ function layersWithLatestFileModifications(
263259
return extractedLayers;
264260
}
265261

266-
export function isWhitedOutFile(filename: string) {
267-
return filename.match(/.wh./gm);
268-
}
262+
export { isWhitedOutFile } from "./layer";
269263

270264
function isBufferType(type: FileContent): type is Buffer {
271265
return (type as Buffer).buffer !== undefined;

0 commit comments

Comments
 (0)