Skip to content

Commit c2ed98e

Browse files
authored
Merge pull request #2633 from github/koesie10/automodel-v2
Add LLM functionality using auto-model V2
2 parents 3f89675 + bebe130 commit c2ed98e

File tree

11 files changed

+722
-53
lines changed

11 files changed

+722
-53
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { promisify } from "util";
2+
import { gzip, gunzip } from "zlib";
3+
4+
/**
5+
* Promisified version of zlib.gzip
6+
*/
7+
export const gzipEncode = promisify(gzip);
8+
9+
/**
10+
* Promisified version of zlib.gunzip
11+
*/
12+
export const gzipDecode = promisify(gunzip);

extensions/ql-vscode/src/config.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,7 @@ export function showQueriesPanel(): boolean {
704704

705705
const DATA_EXTENSIONS = new Setting("dataExtensions", ROOT_SETTING);
706706
const LLM_GENERATION = new Setting("llmGeneration", DATA_EXTENSIONS);
707+
const LLM_GENERATION_V2 = new Setting("llmGenerationV2", DATA_EXTENSIONS);
707708
const FRAMEWORK_MODE = new Setting("frameworkMode", DATA_EXTENSIONS);
708709
const DISABLE_AUTO_NAME_EXTENSION_PACK = new Setting(
709710
"disableAutoNameExtensionPack",
@@ -718,6 +719,10 @@ export function showLlmGeneration(): boolean {
718719
return !!LLM_GENERATION.getValue<boolean>();
719720
}
720721

722+
export function useLlmGenerationV2(): boolean {
723+
return !!LLM_GENERATION_V2.getValue<boolean>();
724+
}
725+
721726
export function enableFrameworkMode(): boolean {
722727
return !!FRAMEWORK_MODE.getValue<boolean>();
723728
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { Credentials } from "../common/authentication";
2+
import { OctokitResponse } from "@octokit/types";
3+
4+
export enum AutomodelMode {
5+
Unspecified = "AUTOMODEL_MODE_UNSPECIFIED",
6+
Framework = "AUTOMODEL_MODE_FRAMEWORK",
7+
Application = "AUTOMODEL_MODE_APPLICATION",
8+
}
9+
10+
export interface ModelRequest {
11+
mode: AutomodelMode;
12+
// Base64-encoded GZIP-compressed SARIF log
13+
candidates: string;
14+
}
15+
16+
export interface ModelResponse {
17+
models: string;
18+
}
19+
20+
export async function autoModelV2(
21+
credentials: Credentials,
22+
request: ModelRequest,
23+
): Promise<ModelResponse> {
24+
const octokit = await credentials.getOctokit();
25+
26+
const response: OctokitResponse<ModelResponse> = await octokit.request(
27+
"POST /repos/github/codeql/code-scanning/codeql/auto-model",
28+
{
29+
data: request,
30+
},
31+
);
32+
33+
return response.data;
34+
}
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import { CodeQLCliServer, SourceInfo } from "../codeql-cli/cli";
2+
import { QueryRunner } from "../query-server";
3+
import { DatabaseItem } from "../databases/local-databases";
4+
import { ProgressCallback } from "../common/vscode/progress";
5+
import * as Sarif from "sarif";
6+
import { qlpackOfDatabase, resolveQueries } from "../local-queries";
7+
import { extLogger } from "../common/logging/vscode";
8+
import { Mode } from "./shared/mode";
9+
import { QlPacksForLanguage } from "../databases/qlpack";
10+
import { createLockFileForStandardQuery } from "../local-queries/standard-queries";
11+
import { CancellationToken, CancellationTokenSource } from "vscode";
12+
import { getOnDiskWorkspaceFolders } from "../common/vscode/workspace-folders";
13+
import { showAndLogExceptionWithTelemetry, TeeLogger } from "../common/logging";
14+
import { QueryResultType } from "../query-server/new-messages";
15+
import { telemetryListener } from "../common/vscode/telemetry";
16+
import { redactableError } from "../common/errors";
17+
import { interpretResultsSarif } from "../query-results";
18+
import { join } from "path";
19+
import { assertNever } from "../common/helpers-pure";
20+
21+
type AutoModelQueryOptions = {
22+
queryTag: string;
23+
mode: Mode;
24+
cliServer: CodeQLCliServer;
25+
queryRunner: QueryRunner;
26+
databaseItem: DatabaseItem;
27+
qlpack: QlPacksForLanguage;
28+
sourceInfo: SourceInfo | undefined;
29+
extensionPacks: string[];
30+
queryStorageDir: string;
31+
32+
progress: ProgressCallback;
33+
token: CancellationToken;
34+
};
35+
36+
function modeTag(mode: Mode): string {
37+
switch (mode) {
38+
case Mode.Application:
39+
return "application-mode";
40+
case Mode.Framework:
41+
return "framework-mode";
42+
default:
43+
assertNever(mode);
44+
}
45+
}
46+
47+
async function runAutoModelQuery({
48+
queryTag,
49+
mode,
50+
cliServer,
51+
queryRunner,
52+
databaseItem,
53+
qlpack,
54+
sourceInfo,
55+
extensionPacks,
56+
queryStorageDir,
57+
progress,
58+
token,
59+
}: AutoModelQueryOptions): Promise<Sarif.Log | undefined> {
60+
// First, resolve the query that we want to run.
61+
// All queries are tagged like this:
62+
// internal extract automodel <mode> <queryTag>
63+
// Example: internal extract automodel framework-mode candidates
64+
const queries = await resolveQueries(
65+
cliServer,
66+
qlpack,
67+
`Extract automodel ${queryTag}`,
68+
{
69+
kind: "problem",
70+
"tags contain all": ["automodel", modeTag(mode), ...queryTag.split(" ")],
71+
},
72+
);
73+
if (queries.length > 1) {
74+
throw new Error(
75+
`Found multiple auto model queries for ${mode} ${queryTag}. Can't continue`,
76+
);
77+
}
78+
if (queries.length === 0) {
79+
throw new Error(
80+
`Did not found any auto model queries for ${mode} ${queryTag}. Can't continue`,
81+
);
82+
}
83+
84+
const queryPath = queries[0];
85+
const { cleanup: cleanupLockFile } = await createLockFileForStandardQuery(
86+
cliServer,
87+
queryPath,
88+
);
89+
90+
// Get metadata for the query. This is required to interpret the results. We already know the kind is problem
91+
// (because of the constraint in resolveQueries), so we don't need any more checks on the metadata.
92+
const metadata = await cliServer.resolveMetadata(queryPath);
93+
94+
const queryRun = queryRunner.createQueryRun(
95+
databaseItem.databaseUri.fsPath,
96+
{
97+
queryPath,
98+
quickEvalPosition: undefined,
99+
quickEvalCountOnly: false,
100+
},
101+
false,
102+
getOnDiskWorkspaceFolders(),
103+
extensionPacks,
104+
queryStorageDir,
105+
undefined,
106+
undefined,
107+
);
108+
109+
const completedQuery = await queryRun.evaluate(
110+
progress,
111+
token,
112+
new TeeLogger(queryRunner.logger, queryRun.outputDir.logPath),
113+
);
114+
115+
await cleanupLockFile?.();
116+
117+
if (completedQuery.resultType !== QueryResultType.SUCCESS) {
118+
void showAndLogExceptionWithTelemetry(
119+
extLogger,
120+
telemetryListener,
121+
redactableError`Auto-model query ${queryTag} failed: ${
122+
completedQuery.message ?? "No message"
123+
}`,
124+
);
125+
return;
126+
}
127+
128+
const interpretedResultsPath = join(
129+
queryStorageDir,
130+
`interpreted-results-${queryTag.replaceAll(" ", "-")}-${queryRun.id}.sarif`,
131+
);
132+
133+
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- We only need the actual SARIF data, not the extra fields added by SarifInterpretationData
134+
const { t, sortState, ...sarif } = await interpretResultsSarif(
135+
cliServer,
136+
metadata,
137+
{
138+
resultsPath: completedQuery.outputDir.bqrsPath,
139+
interpretedResultsPath,
140+
},
141+
sourceInfo,
142+
["--sarif-add-snippets"],
143+
);
144+
145+
return sarif;
146+
}
147+
148+
type AutoModelQueriesOptions = {
149+
mode: Mode;
150+
cliServer: CodeQLCliServer;
151+
queryRunner: QueryRunner;
152+
databaseItem: DatabaseItem;
153+
queryStorageDir: string;
154+
155+
progress: ProgressCallback;
156+
};
157+
158+
export type AutoModelQueriesResult = {
159+
candidates: Sarif.Log;
160+
};
161+
162+
export async function runAutoModelQueries({
163+
mode,
164+
cliServer,
165+
queryRunner,
166+
databaseItem,
167+
queryStorageDir,
168+
progress,
169+
}: AutoModelQueriesOptions): Promise<AutoModelQueriesResult | undefined> {
170+
// maxStep for this part is 1500
171+
const maxStep = 1500;
172+
173+
const cancellationTokenSource = new CancellationTokenSource();
174+
175+
const qlpack = await qlpackOfDatabase(cliServer, databaseItem);
176+
177+
// CodeQL needs to have access to the database to be able to retrieve the
178+
// snippets from it. The source location prefix is used to determine the
179+
// base path of the database.
180+
const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix(
181+
cliServer,
182+
);
183+
const sourceArchiveUri = databaseItem.sourceArchive;
184+
const sourceInfo =
185+
sourceArchiveUri === undefined
186+
? undefined
187+
: {
188+
sourceArchive: sourceArchiveUri.fsPath,
189+
sourceLocationPrefix,
190+
};
191+
192+
const additionalPacks = getOnDiskWorkspaceFolders();
193+
const extensionPacks = Object.keys(
194+
await cliServer.resolveQlpacks(additionalPacks, true),
195+
);
196+
197+
progress({
198+
step: 0,
199+
maxStep,
200+
message: "Finding candidates and examples",
201+
});
202+
203+
const candidates = await runAutoModelQuery({
204+
mode,
205+
queryTag: "candidates",
206+
cliServer,
207+
queryRunner,
208+
databaseItem,
209+
qlpack,
210+
sourceInfo,
211+
extensionPacks,
212+
queryStorageDir,
213+
progress: (update) => {
214+
progress({
215+
step: update.step,
216+
maxStep,
217+
message: "Finding candidates and examples",
218+
});
219+
},
220+
token: cancellationTokenSource.token,
221+
});
222+
223+
if (!candidates) {
224+
return undefined;
225+
}
226+
227+
return {
228+
candidates,
229+
};
230+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { AutomodelMode, ModelRequest } from "./auto-model-api-v2";
2+
import { Mode } from "./shared/mode";
3+
import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
4+
import { assertNever } from "../common/helpers-pure";
5+
import * as Sarif from "sarif";
6+
import { gzipEncode } from "../common/zlib";
7+
8+
/**
9+
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded
10+
* @param log SARIF log to encode
11+
* @returns base64-encoded GZIP-compressed SARIF log
12+
*/
13+
export async function encodeSarif(log: Sarif.Log): Promise<string> {
14+
const json = JSON.stringify(log);
15+
const buffer = Buffer.from(json, "utf-8");
16+
const compressed = await gzipEncode(buffer);
17+
return compressed.toString("base64");
18+
}
19+
20+
export async function createAutoModelV2Request(
21+
mode: Mode,
22+
result: AutoModelQueriesResult,
23+
): Promise<ModelRequest> {
24+
let requestMode: AutomodelMode;
25+
switch (mode) {
26+
case Mode.Application:
27+
requestMode = AutomodelMode.Application;
28+
break;
29+
case Mode.Framework:
30+
requestMode = AutomodelMode.Framework;
31+
break;
32+
default:
33+
assertNever(mode);
34+
}
35+
36+
return {
37+
mode: requestMode,
38+
candidates: await encodeSarif(result.candidates),
39+
};
40+
}

0 commit comments

Comments
 (0)