Skip to content

Commit 16a0fce

Browse files
authored
Merge pull request #3456 from github/koesie10/unique-database-names
Make database storage paths more unique
2 parents 1ab3dea + e003175 commit 16a0fce

5 files changed

Lines changed: 191 additions & 57 deletions

File tree

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
type FilenameOptions = {
2+
removeDots?: boolean;
3+
};
4+
5+
/**
6+
* This will create a filename from an arbitrary string by removing
7+
* all characters which are not allowed in filenames and making them
8+
* more filesystem-friendly be replacing undesirable characters with
9+
* hyphens. The result will always be lowercase ASCII.
10+
*
11+
* @param str The string to create a filename from
12+
* @param removeDots Whether to remove dots from the filename [default: false]
13+
* @returns The filename
14+
*/
15+
export function createFilenameFromString(
16+
str: string,
17+
{ removeDots }: FilenameOptions = {},
18+
) {
19+
let fileName = str;
20+
21+
// Lowercase everything
22+
fileName = fileName.toLowerCase();
23+
24+
// Replace all spaces, underscores, slashes, and backslashes with hyphens
25+
fileName = fileName.replaceAll(/[\s_/\\]+/g, "-");
26+
27+
// Replace all characters which are not allowed by empty strings
28+
fileName = fileName.replaceAll(/[^a-z0-9.-]/g, "");
29+
30+
// Remove any leading or trailing hyphens or dots
31+
fileName = fileName.replaceAll(/^[.-]+|[.-]+$/g, "");
32+
33+
// Replace dots by hyphens if dots are not allowed
34+
if (removeDots) {
35+
fileName = fileName.replaceAll(/\./g, "-");
36+
}
37+
38+
// Remove any duplicate hyphens
39+
fileName = fileName.replaceAll(/-{2,}/g, "-");
40+
// Remove any duplicate dots
41+
fileName = fileName.replaceAll(/\.{2,}/g, ".");
42+
43+
return fileName;
44+
}

extensions/ql-vscode/src/databases/database-fetcher.ts

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ import {
1010
pathExists,
1111
createWriteStream,
1212
remove,
13+
readdir,
1314
} from "fs-extra";
1415
import { basename, join } from "path";
1516
import type { Octokit } from "@octokit/rest";
17+
import { nanoid } from "nanoid";
1618

1719
import type { DatabaseManager, DatabaseItem } from "./local-databases";
1820
import { tmpDir } from "../tmp-dir";
@@ -36,6 +38,7 @@ import { AppOctokit } from "../common/octokit";
3638
import type { DatabaseOrigin } from "./local-databases/database-origin";
3739
import { createTimeoutSignal } from "../common/fetch-stream";
3840
import type { App } from "../common/app";
41+
import { createFilenameFromString } from "../common/filenames";
3942
import { findDirWithFile } from "../common/files";
4043
import { convertGithubNwoToDatabaseUrl } from "./github-databases/api";
4144

@@ -364,7 +367,11 @@ async function databaseArchiveFetcher(
364367
throw new Error("No storage path specified.");
365368
}
366369
await ensureDir(storagePath);
367-
const unzipPath = await getStorageFolder(storagePath, databaseUrl);
370+
const unzipPath = await getStorageFolder(
371+
storagePath,
372+
databaseUrl,
373+
nameOverride,
374+
);
368375

369376
if (isFile(databaseUrl)) {
370377
await readAndUnzip(databaseUrl, unzipPath, cli, progress);
@@ -408,31 +415,60 @@ async function databaseArchiveFetcher(
408415
}
409416
}
410417

411-
async function getStorageFolder(storagePath: string, urlStr: string) {
412-
// we need to generate a folder name for the unzipped archive,
413-
// this needs to be human readable since we may use this name as the initial
414-
// name for the database
415-
const url = Uri.parse(urlStr);
416-
// MacOS has a max filename length of 255
417-
// and remove a few extra chars in case we need to add a counter at the end.
418-
let lastName = basename(url.path).substring(0, 250);
419-
if (lastName.endsWith(".zip")) {
420-
lastName = lastName.substring(0, lastName.length - 4);
418+
// The number of tries to use when generating a unique filename before
419+
// giving up and using a nanoid.
420+
const DUPLICATE_FILENAMES_TRIES = 10_000;
421+
422+
async function getStorageFolder(
423+
storagePath: string,
424+
urlStr: string,
425+
nameOverrride?: string,
426+
) {
427+
let lastName: string;
428+
429+
if (nameOverrride) {
430+
lastName = createFilenameFromString(nameOverrride);
431+
} else {
432+
// we need to generate a folder name for the unzipped archive,
433+
// this needs to be human readable since we may use this name as the initial
434+
// name for the database
435+
const url = Uri.parse(urlStr);
436+
// MacOS has a max filename length of 255
437+
// and remove a few extra chars in case we need to add a counter at the end.
438+
lastName = basename(url.path).substring(0, 250);
439+
if (lastName.endsWith(".zip")) {
440+
lastName = lastName.substring(0, lastName.length - 4);
441+
}
421442
}
422443

423444
const realpath = await fs_realpath(storagePath);
424-
let folderName = join(realpath, lastName);
445+
let folderName = lastName;
446+
447+
// get all existing files instead of calling pathExists on every
448+
// single combination of realpath and folderName
449+
const existingFiles = await readdir(realpath);
425450

426451
// avoid overwriting existing folders
427452
let counter = 0;
428-
while (await pathExists(folderName)) {
453+
while (existingFiles.includes(basename(folderName))) {
429454
counter++;
430-
folderName = join(realpath, `${lastName}-${counter}`);
431-
if (counter > 100) {
432-
throw new Error("Could not find a unique name for downloaded database.");
455+
456+
if (counter <= DUPLICATE_FILENAMES_TRIES) {
457+
// First try to use a counter to make the name unique.
458+
folderName = `${lastName}-${counter}`;
459+
} else if (counter <= DUPLICATE_FILENAMES_TRIES + 5) {
460+
// If there are more than 10,000 similarly named databases,
461+
// give up on using a counter and use a random string instead.
462+
folderName = `${lastName}-${nanoid()}`;
463+
} else {
464+
// This should almost never happen, but just in case, we don't want to
465+
// get stuck in an infinite loop.
466+
throw new Error(
467+
"Could not find a unique name for downloaded database. Please remove some databases and try again.",
468+
);
433469
}
434470
}
435-
return folderName;
471+
return join(realpath, folderName);
436472
}
437473

438474
function validateUrl(databaseUrl: string) {

extensions/ql-vscode/src/model-editor/extension-pack-name.ts

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { createFilenameFromString } from "../common/filenames";
2+
13
const packNamePartRegex = /[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
24
const packNameRegex = new RegExp(
35
`^(?<scope>${packNamePartRegex.source})/(?<name>${packNamePartRegex.source})$`,
@@ -23,7 +25,11 @@ export function autoNameExtensionPack(
2325
}
2426

2527
const parts = packName.split("/");
26-
const sanitizedParts = parts.map((part) => sanitizeExtensionPackName(part));
28+
const sanitizedParts = parts.map((part) =>
29+
createFilenameFromString(part, {
30+
removeDots: true,
31+
}),
32+
);
2733

2834
// If the scope is empty (e.g. if the given name is "-/b"), then we need to still set a scope
2935
if (sanitizedParts[0].length === 0) {
@@ -37,25 +43,6 @@ export function autoNameExtensionPack(
3743
};
3844
}
3945

40-
function sanitizeExtensionPackName(name: string) {
41-
// Lowercase everything
42-
name = name.toLowerCase();
43-
44-
// Replace all spaces, dots, and underscores with hyphens
45-
name = name.replaceAll(/[\s._]+/g, "-");
46-
47-
// Replace all characters which are not allowed by empty strings
48-
name = name.replaceAll(/[^a-z0-9-]/g, "");
49-
50-
// Remove any leading or trailing hyphens
51-
name = name.replaceAll(/^-|-$/g, "");
52-
53-
// Remove any duplicate hyphens
54-
name = name.replaceAll(/-{2,}/g, "-");
55-
56-
return name;
57-
}
58-
5946
export function parsePackName(packName: string): ExtensionPackName | undefined {
6047
const matches = packNameRegex.exec(packName);
6148
if (!matches?.groups) {

extensions/ql-vscode/src/model-editor/yaml.ts

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import type {
2020
ModelExtension,
2121
ModelExtensionFile,
2222
} from "./model-extension-file";
23+
import { createFilenameFromString } from "../common/filenames";
2324
import type { QueryLanguage } from "../common/query-language";
2425

2526
import modelExtensionFileSchema from "./model-extension-file.schema.json";
@@ -275,26 +276,7 @@ export function createFilenameForLibrary(
275276
prefix = "models/",
276277
suffix = ".model",
277278
) {
278-
let libraryName = library;
279-
280-
// Lowercase everything
281-
libraryName = libraryName.toLowerCase();
282-
283-
// Replace all spaces and underscores with hyphens
284-
libraryName = libraryName.replaceAll(/[\s_]+/g, "-");
285-
286-
// Replace all characters which are not allowed by empty strings
287-
libraryName = libraryName.replaceAll(/[^a-z0-9.-]/g, "");
288-
289-
// Remove any leading or trailing hyphens or dots
290-
libraryName = libraryName.replaceAll(/^[.-]+|[.-]+$/g, "");
291-
292-
// Remove any duplicate hyphens
293-
libraryName = libraryName.replaceAll(/-{2,}/g, "-");
294-
// Remove any duplicate dots
295-
libraryName = libraryName.replaceAll(/\.{2,}/g, ".");
296-
297-
return `${prefix}${libraryName}${suffix}.yml`;
279+
return `${prefix}${createFilenameFromString(library)}${suffix}.yml`;
298280
}
299281

300282
export function createFilenameForPackage(
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import { createFilenameFromString } from "../../../src/common/filenames";
2+
3+
describe("createFilenameFromString", () => {
4+
const testCases: Array<{
5+
input: string;
6+
filename: string;
7+
filenameWithoutDots?: string;
8+
}> = [
9+
{
10+
input: "sql2o",
11+
filename: "sql2o",
12+
},
13+
{
14+
input: "spring-boot",
15+
filename: "spring-boot",
16+
},
17+
{
18+
input: "spring--boot",
19+
filename: "spring-boot",
20+
},
21+
{
22+
input: "rt",
23+
filename: "rt",
24+
},
25+
{
26+
input: "System.Runtime",
27+
filename: "system.runtime",
28+
filenameWithoutDots: "system-runtime",
29+
},
30+
{
31+
input: "System..Runtime",
32+
filename: "system.runtime",
33+
filenameWithoutDots: "system-runtime",
34+
},
35+
{
36+
input: "google/brotli",
37+
filename: "google-brotli",
38+
},
39+
{
40+
input: "github/vscode-codeql",
41+
filename: "github-vscode-codeql",
42+
},
43+
{
44+
input: "github/vscode---codeql--",
45+
filename: "github-vscode-codeql",
46+
},
47+
{
48+
input: "github...vscode--c..odeql",
49+
filename: "github.vscode-c.odeql",
50+
filenameWithoutDots: "github-vscode-c-odeql",
51+
},
52+
{
53+
input: "github\\vscode-codeql",
54+
filename: "github-vscode-codeql",
55+
},
56+
{
57+
input: "uNetworking/uWebSockets.js",
58+
filename: "unetworking-uwebsockets.js",
59+
filenameWithoutDots: "unetworking-uwebsockets-js",
60+
},
61+
{
62+
input: "github/.vscode-codeql",
63+
filename: "github-.vscode-codeql",
64+
filenameWithoutDots: "github-vscode-codeql",
65+
},
66+
];
67+
68+
test.each(testCases)(
69+
"returns $filename if string is $input",
70+
({ input, filename }) => {
71+
expect(createFilenameFromString(input)).toEqual(filename);
72+
},
73+
);
74+
75+
test.each(testCases)(
76+
"returns $filename if string is $input and dots are not allowed",
77+
({ input, filename, filenameWithoutDots }) => {
78+
expect(
79+
createFilenameFromString(input, {
80+
removeDots: true,
81+
}),
82+
).toEqual(filenameWithoutDots ?? filename);
83+
},
84+
);
85+
});

0 commit comments

Comments
 (0)