Skip to content

Commit f6c492d

Browse files
committed
Use filtering queries to do batched AI quering.
1 parent 1289ab5 commit f6c492d

File tree

5 files changed

+321
-9
lines changed

5 files changed

+321
-9
lines changed

extensions/ql-vscode/src/data-extensions-editor/auto-model-codeml-queries.ts

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ import { redactableError } from "../common/errors";
1717
import { interpretResultsSarif } from "../query-results";
1818
import { join } from "path";
1919
import { assertNever } from "../common/helpers-pure";
20+
import { dir } from "tmp-promise";
21+
import { writeFile, outputFile } from "fs-extra";
22+
import { dump as dumpYaml } from "js-yaml";
23+
import { MethodSignature } from "./external-api-usage";
2024

2125
type AutoModelQueryOptions = {
2226
queryTag: string;
@@ -26,6 +30,7 @@ type AutoModelQueryOptions = {
2630
databaseItem: DatabaseItem;
2731
qlpack: QlPacksForLanguage;
2832
sourceInfo: SourceInfo | undefined;
33+
additionalPacks: string[];
2934
extensionPacks: string[];
3035
queryStorageDir: string;
3136

@@ -52,6 +57,7 @@ async function runAutoModelQuery({
5257
databaseItem,
5358
qlpack,
5459
sourceInfo,
60+
additionalPacks,
5561
extensionPacks,
5662
queryStorageDir,
5763
progress,
@@ -99,7 +105,7 @@ async function runAutoModelQuery({
99105
quickEvalCountOnly: false,
100106
},
101107
false,
102-
getOnDiskWorkspaceFolders(),
108+
additionalPacks,
103109
extensionPacks,
104110
queryStorageDir,
105111
undefined,
@@ -147,6 +153,7 @@ async function runAutoModelQuery({
147153

148154
type AutoModelQueriesOptions = {
149155
mode: Mode;
156+
candidateMethods: MethodSignature[];
150157
cliServer: CodeQLCliServer;
151158
queryRunner: QueryRunner;
152159
databaseItem: DatabaseItem;
@@ -161,6 +168,7 @@ export type AutoModelQueriesResult = {
161168

162169
export async function runAutoModelQueries({
163170
mode,
171+
candidateMethods,
164172
cliServer,
165173
queryRunner,
166174
databaseItem,
@@ -189,7 +197,13 @@ export async function runAutoModelQueries({
189197
sourceLocationPrefix,
190198
};
191199

192-
const additionalPacks = getOnDiskWorkspaceFolders();
200+
// Generate a pack containing the candidate filters
201+
const filterPackDir = await generateCandidateFilterPack(
202+
databaseItem.language,
203+
candidateMethods,
204+
);
205+
206+
const additionalPacks = [...getOnDiskWorkspaceFolders(), filterPackDir];
193207
const extensionPacks = Object.keys(
194208
await cliServer.resolveQlpacks(additionalPacks, true),
195209
);
@@ -208,6 +222,7 @@ export async function runAutoModelQueries({
208222
databaseItem,
209223
qlpack,
210224
sourceInfo,
225+
additionalPacks,
211226
extensionPacks,
212227
queryStorageDir,
213228
progress: (update) => {
@@ -228,3 +243,59 @@ export async function runAutoModelQueries({
228243
candidates,
229244
};
230245
}
246+
247+
/**
248+
* generateCandidateFilterPack will create a temporary extension pack.
249+
* This pack will contain a filter that will restrict the automodel queries
250+
* to the specified candidate methods only.
251+
* This is done using the `extensible` predicate "automodelCandidateFilter".
252+
* @param language
253+
* @param candidateMethods
254+
* @returns
255+
*/
256+
export async function generateCandidateFilterPack(
257+
language: string,
258+
candidateMethods: MethodSignature[],
259+
): Promise<string> {
260+
// Pack resides in a temporary directory, to not pollute the workspace.
261+
const packDir = (await dir({ unsafeCleanup: true })).path;
262+
263+
const syntheticConfigPack = {
264+
name: "codeql/automodel-filter",
265+
version: "0.0.0",
266+
library: true,
267+
extensionTargets: {
268+
[`codeql/${language}-all`]: "*",
269+
},
270+
dataExtensions: ["filter.yml"],
271+
};
272+
273+
const qlpackFile = join(packDir, "codeql-pack.yml");
274+
await outputFile(qlpackFile, dumpYaml(syntheticConfigPack), "utf8");
275+
276+
// The predicate has the following defintion:
277+
// extensible predicate automodelCandidateFilter(string package, string type, string name, string signature)
278+
const dataRows = candidateMethods.map((method) => [
279+
method.packageName,
280+
method.typeName,
281+
method.methodName,
282+
method.methodParameters,
283+
]);
284+
285+
const filter = {
286+
extensions: [
287+
{
288+
addsTo: {
289+
pack: `codeql/${language}-queries`,
290+
extensible: "automodelCandidateFilter",
291+
},
292+
data: dataRows,
293+
},
294+
],
295+
};
296+
297+
const filterFile = join(packDir, "filter.yml");
298+
await writeFile(filterFile, dumpYaml(filter), "utf8");
299+
300+
return packDir;
301+
}

extensions/ql-vscode/src/data-extensions-editor/auto-model-v2.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,63 @@ import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
44
import { assertNever } from "../common/helpers-pure";
55
import * as Sarif from "sarif";
66
import { gzipEncode } from "../common/zlib";
7+
import { ExternalApiUsage, MethodSignature } from "./external-api-usage";
8+
import { ModeledMethod } from "./modeled-method";
9+
import { groupMethods, sortGroupNames, sortMethods } from "./shared/sorting";
10+
11+
// Soft limit on the number of candidates to send to the model.
12+
// Note that the model may return fewer than this number of candidates.
13+
const candidateLimit = 20;
14+
/**
15+
* Return the candidates that the model should be run on. This includes limiting the number of
16+
* candidates to the candidate limit and filtering out anything that is already modeled and respecting
17+
* the order in the UI.
18+
* @param mode Whether it is application or framework mode.
19+
* @param externalApiUsages all external API usages.
20+
* @param modeledMethods the currently modeled methods.
21+
* @returns list of modeled methods that are candidates for modeling.
22+
*/
23+
export function getCandidates(
24+
mode: Mode,
25+
externalApiUsages: ExternalApiUsage[],
26+
modeledMethods: Record<string, ModeledMethod>,
27+
): MethodSignature[] {
28+
// Sort the same way as the UI so we send the first ones listed in the UI first
29+
const grouped = groupMethods(externalApiUsages, mode);
30+
const sortedGroupNames = sortGroupNames(grouped);
31+
const sortedExternalApiUsages = sortedGroupNames.flatMap((name) =>
32+
sortMethods(grouped[name]),
33+
);
34+
35+
const candidates: MethodSignature[] = [];
36+
37+
for (const externalApiUsage of sortedExternalApiUsages) {
38+
const modeledMethod: ModeledMethod = modeledMethods[
39+
externalApiUsage.signature
40+
] ?? {
41+
type: "none",
42+
};
43+
44+
// If we have reached the max number of candidates then stop
45+
if (candidates.length >= candidateLimit) {
46+
break;
47+
}
48+
49+
// Anything that is modeled is not a candidate
50+
if (modeledMethod.type !== "none") {
51+
continue;
52+
}
53+
54+
// A method that is supported is modeled outside of the model file, so it is not a candidate.
55+
if (externalApiUsage.supported) {
56+
continue;
57+
}
58+
59+
// The rest are candidates
60+
candidates.push(externalApiUsage);
61+
}
62+
return candidates;
63+
}
764

865
/**
966
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded

extensions/ql-vscode/src/data-extensions-editor/data-extensions-editor-view.ts

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ import { join } from "path";
5656
import { pickExtensionPack } from "./extension-pack-picker";
5757
import { getLanguageDisplayName } from "../common/query-language";
5858
import { runAutoModelQueries } from "./auto-model-codeml-queries";
59-
import { createAutoModelV2Request } from "./auto-model-v2";
59+
import { createAutoModelV2Request, getCandidates } from "./auto-model-v2";
6060
import { load as loadYaml } from "js-yaml";
6161
import { loadDataExtensionYaml } from "./yaml";
62+
import { extLogger } from "../common/logging/vscode";
6263

6364
export class DataExtensionsEditorView extends AbstractWebview<
6465
ToDataExtensionsEditorMessage,
@@ -380,8 +381,22 @@ export class DataExtensionsEditorView extends AbstractWebview<
380381
let predictedModeledMethods: Record<string, ModeledMethod>;
381382

382383
if (useLlmGenerationV2()) {
384+
// Fetch the candidates to send to the model
385+
const candidateMethods = getCandidates(
386+
this.mode,
387+
externalApiUsages,
388+
modeledMethods,
389+
);
390+
391+
// If there are no candidates, there is nothing to model and we just return
392+
if (candidateMethods.length === 0) {
393+
void extLogger.log("No candidates to model. Stopping.");
394+
return;
395+
}
396+
383397
const usages = await runAutoModelQueries({
384398
mode: this.mode,
399+
candidateMethods,
385400
cliServer: this.cliServer,
386401
queryRunner: this.queryRunner,
387402
queryStorageDir: this.queryStorageDir,
@@ -421,12 +436,33 @@ export class DataExtensionsEditorView extends AbstractWebview<
421436
filename: "auto-model.yml",
422437
});
423438

424-
const modeledMethods = loadDataExtensionYaml(models);
425-
if (!modeledMethods) {
439+
const loadedMethods = loadDataExtensionYaml(models);
440+
if (!loadedMethods) {
426441
return;
427442
}
428443

429-
predictedModeledMethods = modeledMethods;
444+
// Any candidate that was part of the response is a negative result
445+
// meaning that the canidate is not a sink for the kinds that the LLM is checking for.
446+
// For now we model this as a sink neutral method, however this is subject
447+
// to discussion.
448+
for (const candidate of candidateMethods) {
449+
if (!(candidate.signature in loadedMethods)) {
450+
loadedMethods[candidate.signature] = {
451+
type: "neutral",
452+
kind: "sink",
453+
input: "",
454+
output: "",
455+
provenance: "ai-generated",
456+
signature: candidate.signature,
457+
packageName: candidate.packageName,
458+
typeName: candidate.typeName,
459+
methodName: candidate.methodName,
460+
methodParameters: candidate.methodParameters,
461+
};
462+
}
463+
}
464+
465+
predictedModeledMethods = loadedMethods;
430466
} else {
431467
const usages = await getAutoModelUsages({
432468
cliServer: this.cliServer,

extensions/ql-vscode/test/unit-tests/data-extensions-editor/auto-model-v2.test.ts

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import {
22
createAutoModelV2Request,
33
encodeSarif,
4+
getCandidates,
45
} from "../../../src/data-extensions-editor/auto-model-v2";
56
import { Mode } from "../../../src/data-extensions-editor/shared/mode";
67
import { AutomodelMode } from "../../../src/data-extensions-editor/auto-model-api-v2";
78
import { AutoModelQueriesResult } from "../../../src/data-extensions-editor/auto-model-codeml-queries";
89
import * as sarif from "sarif";
910
import { gzipDecode } from "../../../src/common/zlib";
11+
import { ExternalApiUsage } from "../../../src/data-extensions-editor/external-api-usage";
12+
import { ModeledMethod } from "../../../src/data-extensions-editor/modeled-method";
1013

1114
describe("createAutoModelV2Request", () => {
1215
const createSarifLog = (queryId: string): sarif.Log => {
@@ -80,3 +83,106 @@ describe("createAutoModelV2Request", () => {
8083
expect(parsed).toEqual(result.candidates);
8184
});
8285
});
86+
87+
describe("getCandidates", () => {
88+
it("doesnt return methods that are already modelled", () => {
89+
const externalApiUsages: ExternalApiUsage[] = [];
90+
externalApiUsages.push({
91+
library: "my.jar",
92+
signature: "org.my.A#x()",
93+
packageName: "org.my",
94+
typeName: "A",
95+
methodName: "x",
96+
methodParameters: "()",
97+
supported: false,
98+
supportedType: "none",
99+
usages: [],
100+
});
101+
const modeledMethods: Record<string, ModeledMethod> = {
102+
"org.my.A#x()": {
103+
type: "neutral",
104+
kind: "",
105+
input: "",
106+
output: "",
107+
provenance: "manual",
108+
signature: "org.my.A#x()",
109+
packageName: "org.my",
110+
typeName: "A",
111+
methodName: "x",
112+
methodParameters: "()",
113+
},
114+
};
115+
const candidates = getCandidates(
116+
Mode.Application,
117+
externalApiUsages,
118+
modeledMethods,
119+
);
120+
expect(candidates.length).toEqual(0);
121+
});
122+
it("doesnt return methods that are supported from other sources", () => {
123+
const externalApiUsages: ExternalApiUsage[] = [];
124+
externalApiUsages.push({
125+
library: "my.jar",
126+
signature: "org.my.A#x()",
127+
packageName: "org.my",
128+
typeName: "A",
129+
methodName: "x",
130+
methodParameters: "()",
131+
supported: true,
132+
supportedType: "none",
133+
usages: [],
134+
});
135+
const modeledMethods = {};
136+
const candidates = getCandidates(
137+
Mode.Application,
138+
externalApiUsages,
139+
modeledMethods,
140+
);
141+
expect(candidates.length).toEqual(0);
142+
});
143+
it("return methods that neither modeled nor supported from other sources", () => {
144+
const externalApiUsages: ExternalApiUsage[] = [];
145+
externalApiUsages.push({
146+
library: "my.jar",
147+
signature: "org.my.A#x()",
148+
packageName: "org.my",
149+
typeName: "A",
150+
methodName: "x",
151+
methodParameters: "()",
152+
supported: false,
153+
supportedType: "none",
154+
usages: [],
155+
});
156+
const modeledMethods = {};
157+
const candidates = getCandidates(
158+
Mode.Application,
159+
externalApiUsages,
160+
modeledMethods,
161+
);
162+
expect(candidates.length).toEqual(1);
163+
});
164+
it("respects the limit", () => {
165+
const externalApiUsages: ExternalApiUsage[] = [];
166+
for (let i = 0; i < 30; i++) {
167+
externalApiUsages.push({
168+
library: "my.jar",
169+
signature: `org.my.A#x${i}()`,
170+
171+
packageName: "org.my",
172+
typeName: "A",
173+
methodName: `x${i}`,
174+
methodParameters: "()",
175+
supported: false,
176+
supportedType: "none",
177+
usages: [],
178+
});
179+
}
180+
const modeledMethods = {};
181+
const candidates = getCandidates(
182+
Mode.Application,
183+
externalApiUsages,
184+
modeledMethods,
185+
);
186+
expect(candidates.length).toEqual(20);
187+
});
188+
});

0 commit comments

Comments
 (0)