Skip to content

Commit 84de8ad

Browse files
committed
Add creation of auto-model request V2
1 parent 57bcfbb commit 84de8ad

9 files changed

Lines changed: 749 additions & 37 deletions

File tree

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { gzip, gunzip, InputType as ZlibInputType, ZlibOptions } from "zlib";
2+
3+
/**
4+
* Promisified version of zlib.gzip
5+
* @param buffer Buffer to compress
6+
* @param options zlib options
7+
*/
8+
export function gzipEncode(
9+
buffer: ZlibInputType,
10+
options: ZlibOptions = {},
11+
): Promise<Buffer> {
12+
return new Promise((resolve, reject) => {
13+
gzip(buffer, options, (error, result) => {
14+
if (error) {
15+
reject(error);
16+
return;
17+
}
18+
19+
resolve(result);
20+
});
21+
});
22+
}
23+
24+
/**
25+
* Promisified version of zlib.gunzip
26+
* @param buffer Buffer to decompress
27+
* @param options zlib options
28+
*/
29+
export function gzipDecode(
30+
buffer: ZlibInputType,
31+
options: ZlibOptions = {},
32+
): Promise<Buffer> {
33+
return new Promise((resolve, reject) => {
34+
gunzip(buffer, options, (error, result) => {
35+
if (error) {
36+
reject(error);
37+
return;
38+
}
39+
40+
resolve(result);
41+
});
42+
});
43+
}

extensions/ql-vscode/src/config.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,7 @@ export function showQueriesPanel(): boolean {
704704

705705
const DATA_EXTENSIONS = new Setting("dataExtensions", ROOT_SETTING);
706706
const LLM_GENERATION = new Setting("llmGeneration", DATA_EXTENSIONS);
707+
const LLM_GENERATION_V2 = new Setting("llmGenerationV2", DATA_EXTENSIONS);
707708
const FRAMEWORK_MODE = new Setting("frameworkMode", DATA_EXTENSIONS);
708709
const DISABLE_AUTO_NAME_EXTENSION_PACK = new Setting(
709710
"disableAutoNameExtensionPack",
@@ -718,6 +719,10 @@ export function showLlmGeneration(): boolean {
718719
return !!LLM_GENERATION.getValue<boolean>();
719720
}
720721

722+
export function useLlmGenerationV2(): boolean {
723+
return !!LLM_GENERATION_V2.getValue<boolean>();
724+
}
725+
721726
export function enableFrameworkMode(): boolean {
722727
return !!FRAMEWORK_MODE.getValue<boolean>();
723728
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { Credentials } from "../common/authentication";
2+
import { OctokitResponse } from "@octokit/types";
3+
4+
export enum AutomodelMode {
5+
Unspecified = "AUTOMODEL_MODE_UNSPECIFIED",
6+
Framework = "AUTOMODEL_MODE_FRAMEWORK",
7+
Application = "AUTOMODEL_MODE_APPLICATION",
8+
}
9+
10+
export interface ModelRequest {
11+
mode: AutomodelMode;
12+
// Base64-encoded GZIP-compressed SARIF log
13+
candidates: string;
14+
}
15+
16+
export interface ModelResponse {
17+
models: string;
18+
}
19+
20+
export async function autoModelV2(
21+
credentials: Credentials,
22+
request: ModelRequest,
23+
): Promise<ModelResponse> {
24+
const octokit = await credentials.getOctokit();
25+
26+
const response: OctokitResponse<ModelResponse> = await octokit.request(
27+
"POST /repos/github/codeql/code-scanning/codeql/auto-model",
28+
{
29+
data: request,
30+
},
31+
);
32+
33+
return response.data;
34+
}
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import { CodeQLCliServer, SourceInfo } from "../codeql-cli/cli";
2+
import { QueryRunner } from "../query-server";
3+
import { DatabaseItem } from "../databases/local-databases";
4+
import { ProgressCallback } from "../common/vscode/progress";
5+
import * as Sarif from "sarif";
6+
import { qlpackOfDatabase, resolveQueries } from "../local-queries";
7+
import { extLogger } from "../common/logging/vscode";
8+
import { Mode } from "./shared/mode";
9+
import { QlPacksForLanguage } from "../databases/qlpack";
10+
import { createLockFileForStandardQuery } from "../local-queries/standard-queries";
11+
import { CancellationToken, CancellationTokenSource } from "vscode";
12+
import { getOnDiskWorkspaceFolders } from "../common/vscode/workspace-folders";
13+
import { showAndLogExceptionWithTelemetry, TeeLogger } from "../common/logging";
14+
import { QueryResultType } from "../query-server/new-messages";
15+
import { telemetryListener } from "../common/vscode/telemetry";
16+
import { redactableError } from "../common/errors";
17+
import { interpretResultsSarif } from "../query-results";
18+
import { join } from "path";
19+
import { assertNever } from "../common/helpers-pure";
20+
21+
type Options = {
22+
cliServer: CodeQLCliServer;
23+
queryRunner: QueryRunner;
24+
databaseItem: DatabaseItem;
25+
queryStorageDir: string;
26+
27+
progress: ProgressCallback;
28+
};
29+
30+
export type AutoModelQueriesResult = {
31+
candidates: Sarif.Log;
32+
};
33+
34+
type AutoModelQueryOptions = {
35+
cliServer: CodeQLCliServer;
36+
queryRunner: QueryRunner;
37+
databaseItem: DatabaseItem;
38+
qlpack: QlPacksForLanguage;
39+
sourceInfo: SourceInfo | undefined;
40+
extensionPacks: string[];
41+
queryStorageDir: string;
42+
43+
progress: ProgressCallback;
44+
token: CancellationToken;
45+
};
46+
47+
function modeTag(mode: Mode): string {
48+
switch (mode) {
49+
case Mode.Application:
50+
return "application-mode";
51+
case Mode.Framework:
52+
return "framework-mode";
53+
default:
54+
assertNever(mode);
55+
}
56+
}
57+
58+
async function runAutoModelQuery(
59+
mode: Mode,
60+
queryTag: string,
61+
{
62+
cliServer,
63+
queryRunner,
64+
databaseItem,
65+
qlpack,
66+
sourceInfo,
67+
extensionPacks,
68+
queryStorageDir,
69+
progress,
70+
token,
71+
}: AutoModelQueryOptions,
72+
): Promise<Sarif.Log | undefined> {
73+
// First, resolve the query that we want to run.
74+
// All queries are tagged like this:
75+
// internal extract automodel <mode> <queryTag>
76+
// Example: internal extract automodel framework-mode candidates
77+
const queries = await resolveQueries(
78+
cliServer,
79+
qlpack,
80+
`Extract automodel ${queryTag}`,
81+
{
82+
kind: "problem",
83+
"tags contain all": ["automodel", modeTag(mode), ...queryTag.split(" ")],
84+
},
85+
);
86+
if (queries.length > 1) {
87+
throw new Error(
88+
`Found multiple auto model queries for ${mode} ${queryTag}. Can't continue`,
89+
);
90+
}
91+
if (queries.length === 0) {
92+
throw new Error(
93+
`Did not found any auto model queries for ${mode} ${queryTag}. Can't continue`,
94+
);
95+
}
96+
97+
const queryPath = queries[0];
98+
const { cleanup: cleanupLockFile } = await createLockFileForStandardQuery(
99+
cliServer,
100+
queryPath,
101+
);
102+
103+
// Get metadata for the query. This is required to interpret the results. We already know the kind is problem
104+
// (because of the constraint in resolveQueries), so we don't need any more checks on the metadata.
105+
const metadata = await cliServer.resolveMetadata(queryPath);
106+
107+
const queryRun = queryRunner.createQueryRun(
108+
databaseItem.databaseUri.fsPath,
109+
{
110+
queryPath,
111+
quickEvalPosition: undefined,
112+
quickEvalCountOnly: false,
113+
},
114+
false,
115+
getOnDiskWorkspaceFolders(),
116+
extensionPacks,
117+
queryStorageDir,
118+
undefined,
119+
undefined,
120+
);
121+
122+
const completedQuery = await queryRun.evaluate(
123+
progress,
124+
token,
125+
new TeeLogger(queryRunner.logger, queryRun.outputDir.logPath),
126+
);
127+
128+
await cleanupLockFile?.();
129+
130+
if (completedQuery.resultType !== QueryResultType.SUCCESS) {
131+
void showAndLogExceptionWithTelemetry(
132+
extLogger,
133+
telemetryListener,
134+
redactableError`Auto-model query ${queryTag} failed: ${
135+
completedQuery.message ?? "No message"
136+
}`,
137+
);
138+
return;
139+
}
140+
141+
const interpretedResultsPath = join(
142+
queryStorageDir,
143+
`interpreted-results-${queryTag.replaceAll(" ", "-")}-${queryRun.id}.sarif`,
144+
);
145+
146+
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- We only need the actual SARIF data, not the extra fields added by SarifInterpretationData
147+
const { t, sortState, ...sarif } = await interpretResultsSarif(
148+
cliServer,
149+
metadata,
150+
{
151+
resultsPath: completedQuery.outputDir.bqrsPath,
152+
interpretedResultsPath,
153+
},
154+
sourceInfo,
155+
["--sarif-add-snippets"],
156+
);
157+
158+
return sarif;
159+
}
160+
161+
export async function runAutoModelQueries(
162+
mode: Mode,
163+
{ cliServer, queryRunner, databaseItem, queryStorageDir, progress }: Options,
164+
): Promise<AutoModelQueriesResult | undefined> {
165+
// maxStep for this part is 1500
166+
const maxStep = 1500;
167+
168+
const cancellationTokenSource = new CancellationTokenSource();
169+
170+
const qlpack = await qlpackOfDatabase(cliServer, databaseItem);
171+
172+
// CodeQL needs to have access to the database to be able to retrieve the
173+
// snippets from it. The source location prefix is used to determine the
174+
// base path of the database.
175+
const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix(
176+
cliServer,
177+
);
178+
const sourceArchiveUri = databaseItem.sourceArchive;
179+
const sourceInfo =
180+
sourceArchiveUri === undefined
181+
? undefined
182+
: {
183+
sourceArchive: sourceArchiveUri.fsPath,
184+
sourceLocationPrefix,
185+
};
186+
187+
const additionalPacks = getOnDiskWorkspaceFolders();
188+
const extensionPacks = Object.keys(
189+
await cliServer.resolveQlpacks(additionalPacks, true),
190+
);
191+
192+
progress({
193+
step: 0,
194+
maxStep,
195+
message: "Finding candidates and examples",
196+
});
197+
198+
const candidates = await runAutoModelQuery(mode, "candidates", {
199+
cliServer,
200+
queryRunner,
201+
databaseItem,
202+
qlpack,
203+
sourceInfo,
204+
extensionPacks,
205+
queryStorageDir,
206+
progress: (update) => {
207+
progress({
208+
step: update.step,
209+
maxStep,
210+
message: "Finding candidates and examples",
211+
});
212+
},
213+
token: cancellationTokenSource.token,
214+
});
215+
216+
if (!candidates) {
217+
return undefined;
218+
}
219+
220+
return {
221+
candidates,
222+
};
223+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { AutomodelMode, ModelRequest } from "./auto-model-api-v2";
2+
import { Mode } from "./shared/mode";
3+
import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
4+
import { assertNever } from "../common/helpers-pure";
5+
import * as Sarif from "sarif";
6+
import { gzipEncode } from "../common/zlib";
7+
8+
/**
9+
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded
10+
* @param log SARIF log to encode
11+
* @returns base64-encoded GZIP-compressed SARIF log
12+
*/
13+
export async function encodeSarif(log: Sarif.Log): Promise<string> {
14+
const json = JSON.stringify(log);
15+
const buffer = Buffer.from(json, "utf-8");
16+
const compressed = await gzipEncode(buffer);
17+
return compressed.toString("base64");
18+
}
19+
20+
export async function createAutoModelV2Request(
21+
mode: Mode,
22+
result: AutoModelQueriesResult,
23+
): Promise<ModelRequest> {
24+
let requestMode: AutomodelMode;
25+
switch (mode) {
26+
case Mode.Application:
27+
requestMode = AutomodelMode.Application;
28+
break;
29+
case Mode.Framework:
30+
requestMode = AutomodelMode.Framework;
31+
break;
32+
default:
33+
assertNever(mode);
34+
}
35+
36+
return {
37+
mode: requestMode,
38+
candidates: await encodeSarif(result.candidates),
39+
};
40+
}

0 commit comments

Comments
 (0)