Skip to content

Commit ce687fa

Browse files
committed
chore: refactored docker and kaniko archive extraction
1 parent f9e49ac commit ce687fa

File tree

9 files changed

+347
-351
lines changed

9 files changed

+347
-351
lines changed
Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,13 @@
1-
import { normalize as normalizePath } from "path";
2-
import { HashAlgorithm } from "../../types";
1+
import {
2+
createGetImageIdFromManifest,
3+
dockerArchiveConfig,
4+
getManifestLayers,
5+
} from "../generic-archive-extractor";
36

4-
import { DockerArchiveManifest } from "../types";
57
export { extractArchive } from "./layer";
68

7-
export function getManifestLayers(manifest: DockerArchiveManifest) {
8-
return manifest.Layers.map((layer) => normalizePath(layer));
9-
}
9+
export { getManifestLayers };
1010

11-
export function getImageIdFromManifest(
12-
manifest: DockerArchiveManifest,
13-
): string {
14-
try {
15-
const imageId = manifest.Config.split(".")[0];
16-
if (imageId.includes(":")) {
17-
// imageId includes the algorithm prefix
18-
return imageId;
19-
}
20-
return `${HashAlgorithm.Sha256}:${imageId}`;
21-
} catch (err) {
22-
throw new Error("Failed to extract image ID from archive manifest");
23-
}
24-
}
11+
export const getImageIdFromManifest = createGetImageIdFromManifest(
12+
dockerArchiveConfig,
13+
);
Lines changed: 4 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,135 +1,6 @@
1-
import * as Debug from "debug";
2-
import { createReadStream } from "fs";
3-
import * as gunzip from "gunzip-maybe";
4-
import { basename, normalize as normalizePath } from "path";
5-
import { Readable } from "stream";
6-
import { extract, Extract } from "tar-stream";
7-
import { InvalidArchiveError } from "..";
8-
import { streamToJson } from "../../stream-utils";
9-
import { PluginOptions } from "../../types";
10-
import { extractImageLayer } from "../layer";
111
import {
12-
DockerArchiveManifest,
13-
ExtractAction,
14-
ExtractedLayers,
15-
ExtractedLayersAndManifest,
16-
ImageConfig,
17-
} from "../types";
2+
createExtractArchive,
3+
dockerArchiveConfig,
4+
} from "../generic-archive-extractor";
185

19-
const debug = Debug("snyk");
20-
21-
/**
22-
* Retrieve the products of files content from the specified docker-archive.
23-
* @param dockerArchiveFilesystemPath Path to image file saved in docker-archive format.
24-
* @param extractActions Array of pattern-callbacks pairs.
25-
* @param options PluginOptions
26-
* @returns Array of extracted files products sorted by the reverse order of the layers from last to first.
27-
*/
28-
export async function extractArchive(
29-
dockerArchiveFilesystemPath: string,
30-
extractActions: ExtractAction[],
31-
_options: Partial<PluginOptions>,
32-
): Promise<ExtractedLayersAndManifest> {
33-
return new Promise((resolve, reject) => {
34-
const tarExtractor: Extract = extract();
35-
const layers: Record<string, ExtractedLayers> = {};
36-
let manifest: DockerArchiveManifest;
37-
let imageConfig: ImageConfig;
38-
39-
tarExtractor.on("entry", async (header, stream, next) => {
40-
if (header.type === "file") {
41-
const normalizedName = normalizePath(header.name);
42-
if (isTarFile(normalizedName)) {
43-
try {
44-
layers[normalizedName] = await extractImageLayer(
45-
stream,
46-
extractActions,
47-
);
48-
} catch (error) {
49-
debug(`Error extracting layer content from: '${error.message}'`);
50-
reject(new Error("Error reading tar archive"));
51-
}
52-
} else if (isManifestFile(normalizedName)) {
53-
const manifestArray = await getManifestFile<DockerArchiveManifest[]>(
54-
stream,
55-
);
56-
manifest = manifestArray[0];
57-
} else if (isImageConfigFile(normalizedName)) {
58-
imageConfig = await getManifestFile<ImageConfig>(stream);
59-
}
60-
}
61-
62-
stream.resume(); // auto drain the stream
63-
next(); // ready for next entry
64-
});
65-
66-
tarExtractor.on("finish", () => {
67-
try {
68-
resolve(
69-
getLayersContentAndArchiveManifest(manifest, imageConfig, layers),
70-
);
71-
} catch (error) {
72-
debug(
73-
`Error getting layers and manifest content from docker archive: ${error.message}`,
74-
);
75-
reject(new InvalidArchiveError("Invalid Docker archive"));
76-
}
77-
});
78-
79-
tarExtractor.on("error", (error) => reject(error));
80-
81-
createReadStream(dockerArchiveFilesystemPath)
82-
.pipe(gunzip())
83-
.pipe(tarExtractor);
84-
});
85-
}
86-
87-
function getLayersContentAndArchiveManifest(
88-
manifest: DockerArchiveManifest,
89-
imageConfig: ImageConfig,
90-
layers: Record<string, ExtractedLayers>,
91-
): ExtractedLayersAndManifest {
92-
// skip (ignore) non-existent layers
93-
// get the layers content without the name
94-
// reverse layers order from last to first
95-
const layersWithNormalizedNames = manifest.Layers.map((layersName) =>
96-
normalizePath(layersName),
97-
);
98-
const filteredLayers = layersWithNormalizedNames
99-
.filter((layersName) => layers[layersName])
100-
.map((layerName) => layers[layerName])
101-
.reverse();
102-
103-
if (filteredLayers.length === 0) {
104-
throw new Error("We found no layers in the provided image");
105-
}
106-
107-
return {
108-
layers: filteredLayers,
109-
manifest,
110-
imageConfig,
111-
};
112-
}
113-
114-
/**
115-
* Note: consumes the stream.
116-
*/
117-
async function getManifestFile<T>(stream: Readable): Promise<T> {
118-
return streamToJson<T>(stream);
119-
}
120-
121-
function isManifestFile(name: string): boolean {
122-
return name === "manifest.json";
123-
}
124-
125-
function isImageConfigFile(name: string): boolean {
126-
const configRegex = new RegExp("[A-Fa-f0-9]{64}\\.json");
127-
return configRegex.test(name);
128-
}
129-
130-
function isTarFile(name: string): boolean {
131-
// For both "docker save" and "skopeo copy" style archives the
132-
// layers are represented as tar archives whose names end in .tar.
133-
// For Docker this is "layer.tar", for Skopeo - "<sha256ofLayer>.tar".
134-
return basename(name).endsWith(".tar");
135-
}
6+
export const extractArchive = createExtractArchive(dockerArchiveConfig);
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import * as Debug from "debug";
2+
import { createReadStream } from "fs";
3+
import * as gunzip from "gunzip-maybe";
4+
import { basename, normalize as normalizePath } from "path";
5+
import { Readable } from "stream";
6+
import { extract, Extract } from "tar-stream";
7+
import { streamToJson } from "../stream-utils";
8+
9+
export class InvalidArchiveError extends Error {
10+
constructor(message: string) {
11+
super();
12+
this.name = "InvalidArchiveError";
13+
this.message = message;
14+
}
15+
}
16+
import { HashAlgorithm, PluginOptions } from "../types";
17+
import { extractImageLayer } from "./layer";
18+
import {
19+
ExtractAction,
20+
ExtractedLayers,
21+
ExtractedLayersAndManifest,
22+
ImageConfig,
23+
TarArchiveManifest,
24+
} from "./types";
25+
26+
const debug = Debug("snyk");
27+
28+
export interface ArchiveConfig {
29+
isLayerFile: (name: string) => boolean;
30+
isImageConfigFile: (name: string) => boolean;
31+
formatLabel: string;
32+
layerErrorType: string;
33+
extractImageId: (configValue: string) => string;
34+
}
35+
36+
export const dockerArchiveConfig: ArchiveConfig = {
37+
isLayerFile: (name) => basename(name).endsWith(".tar"),
38+
isImageConfigFile: (name) =>
39+
new RegExp("[A-Fa-f0-9]{64}\\.json").test(name),
40+
formatLabel: "Docker",
41+
layerErrorType: "tar",
42+
extractImageId: (configValue) => configValue.split(".")[0],
43+
};
44+
45+
export const kanikoArchiveConfig: ArchiveConfig = {
46+
isLayerFile: (name) => basename(name).endsWith(".tar.gz"),
47+
isImageConfigFile: (name) =>
48+
new RegExp("sha256:[A-Fa-f0-9]{64}").test(name),
49+
formatLabel: "Kaniko",
50+
layerErrorType: "tar.gz",
51+
extractImageId: (configValue) => configValue,
52+
};
53+
54+
export function createExtractArchive(
55+
config: ArchiveConfig,
56+
): (
57+
archiveFilesystemPath: string,
58+
extractActions: ExtractAction[],
59+
options: Partial<PluginOptions>,
60+
) => Promise<ExtractedLayersAndManifest> {
61+
return (archiveFilesystemPath, extractActions, _options) =>
62+
new Promise((resolve, reject) => {
63+
const tarExtractor: Extract = extract();
64+
const layers: Record<string, ExtractedLayers> = {};
65+
let manifest: TarArchiveManifest;
66+
let imageConfig: ImageConfig;
67+
68+
tarExtractor.on("entry", async (header, stream, next) => {
69+
if (header.type === "file") {
70+
const normalizedName = normalizePath(header.name);
71+
if (config.isLayerFile(normalizedName)) {
72+
try {
73+
layers[normalizedName] = await extractImageLayer(
74+
stream,
75+
extractActions,
76+
);
77+
} catch (error) {
78+
debug(`Error extracting layer content from: '${error.message}'`);
79+
reject(
80+
new Error(
81+
`Error reading ${config.layerErrorType} archive`,
82+
),
83+
);
84+
}
85+
} else if (isManifestFile(normalizedName)) {
86+
const manifestArray =
87+
await getManifestFile<TarArchiveManifest[]>(stream);
88+
manifest = manifestArray[0];
89+
} else if (config.isImageConfigFile(normalizedName)) {
90+
imageConfig = await getManifestFile<ImageConfig>(stream);
91+
}
92+
}
93+
94+
stream.resume();
95+
next();
96+
});
97+
98+
tarExtractor.on("finish", () => {
99+
try {
100+
resolve(
101+
assembleLayersAndManifest(manifest, imageConfig, layers),
102+
);
103+
} catch (error) {
104+
debug(
105+
`Error getting layers and manifest content from ${config.formatLabel} archive: ${error.message}`,
106+
);
107+
reject(
108+
new InvalidArchiveError(`Invalid ${config.formatLabel} archive`),
109+
);
110+
}
111+
});
112+
113+
tarExtractor.on("error", (error) => reject(error));
114+
115+
createReadStream(archiveFilesystemPath)
116+
.on("error", (error) => reject(error))
117+
.pipe(gunzip())
118+
.pipe(tarExtractor);
119+
});
120+
}
121+
122+
function assembleLayersAndManifest(
123+
manifest: TarArchiveManifest,
124+
imageConfig: ImageConfig,
125+
layers: Record<string, ExtractedLayers>,
126+
): ExtractedLayersAndManifest {
127+
const layersWithNormalizedNames = manifest.Layers.map((layerName) =>
128+
normalizePath(layerName),
129+
);
130+
const filteredLayers = layersWithNormalizedNames
131+
.filter((layerName) => layers[layerName])
132+
.map((layerName) => layers[layerName])
133+
.reverse();
134+
135+
if (filteredLayers.length === 0) {
136+
throw new Error("We found no layers in the provided image");
137+
}
138+
139+
return {
140+
layers: filteredLayers,
141+
manifest,
142+
imageConfig,
143+
};
144+
}
145+
146+
async function getManifestFile<T>(stream: Readable): Promise<T> {
147+
return streamToJson<T>(stream);
148+
}
149+
150+
function isManifestFile(name: string): boolean {
151+
return name === "manifest.json";
152+
}
153+
154+
export function createGetImageIdFromManifest(
155+
config: ArchiveConfig,
156+
): (manifest: TarArchiveManifest) => string {
157+
return (manifest) => {
158+
try {
159+
const imageId = config.extractImageId(manifest.Config);
160+
if (imageId.includes(":")) {
161+
return imageId;
162+
}
163+
return `${HashAlgorithm.Sha256}:${imageId}`;
164+
} catch (err) {
165+
throw new Error("Failed to extract image ID from archive manifest");
166+
}
167+
};
168+
}
169+
170+
export function getManifestLayers(manifest: TarArchiveManifest): string[] {
171+
return manifest.Layers.map((layer) => normalizePath(layer));
172+
}

lib/extractor/index.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,12 @@ import {
2020
ImageConfig,
2121
OciArchiveManifest,
2222
} from "./types";
23+
import { isWhitedOutFile } from "./layer";
24+
import { InvalidArchiveError } from "./generic-archive-extractor";
2325

2426
const debug = Debug("snyk");
2527

26-
export class InvalidArchiveError extends Error {
27-
constructor(message) {
28-
super();
29-
this.name = "InvalidArchiveError";
30-
this.message = message;
31-
}
32-
}
28+
export { InvalidArchiveError } from "./generic-archive-extractor";
3329
class ArchiveExtractor {
3430
private extractor: Extractor;
3531
private fileSystemPath: string;
@@ -260,9 +256,7 @@ function layersWithLatestFileModifications(
260256
return extractedLayers;
261257
}
262258

263-
export function isWhitedOutFile(filename: string) {
264-
return filename.match(/.wh./gm);
265-
}
259+
export { isWhitedOutFile } from "./layer";
266260

267261
function isBufferType(type: FileContent): type is Buffer {
268262
return (type as Buffer).buffer !== undefined;

0 commit comments

Comments
 (0)