Skip to content

Commit 234760e

Browse files
authored
Merge pull request #2670 from github/starcke/apply-slice-filter
Use filtering queries to do batched AI querying
2 parents 61f8f5f + 9bd2286 commit 234760e

File tree

5 files changed

+325
-9
lines changed

5 files changed

+325
-9
lines changed

extensions/ql-vscode/src/data-extensions-editor/auto-model-codeml-queries.ts

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ import { redactableError } from "../common/errors";
1717
import { interpretResultsSarif } from "../query-results";
1818
import { join } from "path";
1919
import { assertNever } from "../common/helpers-pure";
20+
import { dir } from "tmp-promise";
21+
import { writeFile, outputFile } from "fs-extra";
22+
import { dump as dumpYaml } from "js-yaml";
23+
import { MethodSignature } from "./external-api-usage";
2024

2125
type AutoModelQueryOptions = {
2226
queryTag: string;
@@ -26,6 +30,7 @@ type AutoModelQueryOptions = {
2630
databaseItem: DatabaseItem;
2731
qlpack: QlPacksForLanguage;
2832
sourceInfo: SourceInfo | undefined;
33+
additionalPacks: string[];
2934
extensionPacks: string[];
3035
queryStorageDir: string;
3136

@@ -52,6 +57,7 @@ async function runAutoModelQuery({
5257
databaseItem,
5358
qlpack,
5459
sourceInfo,
60+
additionalPacks,
5561
extensionPacks,
5662
queryStorageDir,
5763
progress,
@@ -99,7 +105,7 @@ async function runAutoModelQuery({
99105
quickEvalCountOnly: false,
100106
},
101107
false,
102-
getOnDiskWorkspaceFolders(),
108+
additionalPacks,
103109
extensionPacks,
104110
queryStorageDir,
105111
undefined,
@@ -147,6 +153,7 @@ async function runAutoModelQuery({
147153

148154
type AutoModelQueriesOptions = {
149155
mode: Mode;
156+
candidateMethods: MethodSignature[];
150157
cliServer: CodeQLCliServer;
151158
queryRunner: QueryRunner;
152159
databaseItem: DatabaseItem;
@@ -161,6 +168,7 @@ export type AutoModelQueriesResult = {
161168

162169
export async function runAutoModelQueries({
163170
mode,
171+
candidateMethods,
164172
cliServer,
165173
queryRunner,
166174
databaseItem,
@@ -189,7 +197,13 @@ export async function runAutoModelQueries({
189197
sourceLocationPrefix,
190198
};
191199

192-
const additionalPacks = getOnDiskWorkspaceFolders();
200+
// Generate a pack containing the candidate filters
201+
const filterPackDir = await generateCandidateFilterPack(
202+
databaseItem.language,
203+
candidateMethods,
204+
);
205+
206+
const additionalPacks = [...getOnDiskWorkspaceFolders(), filterPackDir];
193207
const extensionPacks = Object.keys(
194208
await cliServer.resolveQlpacks(additionalPacks, true),
195209
);
@@ -208,6 +222,7 @@ export async function runAutoModelQueries({
208222
databaseItem,
209223
qlpack,
210224
sourceInfo,
225+
additionalPacks,
211226
extensionPacks,
212227
queryStorageDir,
213228
progress: (update) => {
@@ -228,3 +243,59 @@ export async function runAutoModelQueries({
228243
candidates,
229244
};
230245
}
246+
247+
/**
248+
* generateCandidateFilterPack will create a temporary extension pack.
249+
* This pack will contain a filter that will restrict the automodel queries
250+
* to the specified candidate methods only.
251+
* This is done using the `extensible` predicate "automodelCandidateFilter".
252+
* @param language
253+
* @param candidateMethods
254+
* @returns
255+
*/
256+
export async function generateCandidateFilterPack(
257+
language: string,
258+
candidateMethods: MethodSignature[],
259+
): Promise<string> {
260+
// Pack resides in a temporary directory, to not pollute the workspace.
261+
const packDir = (await dir({ unsafeCleanup: true })).path;
262+
263+
const syntheticConfigPack = {
264+
name: "codeql/automodel-filter",
265+
version: "0.0.0",
266+
library: true,
267+
extensionTargets: {
268+
[`codeql/${language}-queries`]: "*",
269+
},
270+
dataExtensions: ["filter.yml"],
271+
};
272+
273+
const qlpackFile = join(packDir, "codeql-pack.yml");
274+
await outputFile(qlpackFile, dumpYaml(syntheticConfigPack), "utf8");
275+
276+
// The predicate has the following defintion:
277+
// extensible predicate automodelCandidateFilter(string package, string type, string name, string signature)
278+
const dataRows = candidateMethods.map((method) => [
279+
method.packageName,
280+
method.typeName,
281+
method.methodName,
282+
method.methodParameters,
283+
]);
284+
285+
const filter = {
286+
extensions: [
287+
{
288+
addsTo: {
289+
pack: `codeql/${language}-queries`,
290+
extensible: "automodelCandidateFilter",
291+
},
292+
data: dataRows,
293+
},
294+
],
295+
};
296+
297+
const filterFile = join(packDir, "filter.yml");
298+
await writeFile(filterFile, dumpYaml(filter), "utf8");
299+
300+
return packDir;
301+
}

extensions/ql-vscode/src/data-extensions-editor/auto-model-v2.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,63 @@ import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
44
import { assertNever } from "../common/helpers-pure";
55
import * as Sarif from "sarif";
66
import { gzipEncode } from "../common/zlib";
7+
import { ExternalApiUsage, MethodSignature } from "./external-api-usage";
8+
import { ModeledMethod } from "./modeled-method";
9+
import { groupMethods, sortGroupNames, sortMethods } from "./shared/sorting";
10+
11+
// Soft limit on the number of candidates to send to the model.
12+
// Note that the model may return fewer than this number of candidates.
13+
const candidateLimit = 20;
14+
/**
15+
* Return the candidates that the model should be run on. This includes limiting the number of
16+
* candidates to the candidate limit and filtering out anything that is already modeled and respecting
17+
* the order in the UI.
18+
* @param mode Whether it is application or framework mode.
19+
* @param externalApiUsages all external API usages.
20+
* @param modeledMethods the currently modeled methods.
21+
* @returns list of modeled methods that are candidates for modeling.
22+
*/
23+
export function getCandidates(
24+
mode: Mode,
25+
externalApiUsages: ExternalApiUsage[],
26+
modeledMethods: Record<string, ModeledMethod>,
27+
): MethodSignature[] {
28+
// Sort the same way as the UI so we send the first ones listed in the UI first
29+
const grouped = groupMethods(externalApiUsages, mode);
30+
const sortedGroupNames = sortGroupNames(grouped);
31+
const sortedExternalApiUsages = sortedGroupNames.flatMap((name) =>
32+
sortMethods(grouped[name]),
33+
);
34+
35+
const candidates: MethodSignature[] = [];
36+
37+
for (const externalApiUsage of sortedExternalApiUsages) {
38+
const modeledMethod: ModeledMethod = modeledMethods[
39+
externalApiUsage.signature
40+
] ?? {
41+
type: "none",
42+
};
43+
44+
// If we have reached the max number of candidates then stop
45+
if (candidates.length >= candidateLimit) {
46+
break;
47+
}
48+
49+
// Anything that is modeled is not a candidate
50+
if (modeledMethod.type !== "none") {
51+
continue;
52+
}
53+
54+
// A method that is supported is modeled outside of the model file, so it is not a candidate.
55+
if (externalApiUsage.supported) {
56+
continue;
57+
}
58+
59+
// The rest are candidates
60+
candidates.push(externalApiUsage);
61+
}
62+
return candidates;
63+
}
764

865
/**
966
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded

extensions/ql-vscode/src/data-extensions-editor/data-extensions-editor-view.ts

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ import { join } from "path";
5656
import { pickExtensionPack } from "./extension-pack-picker";
5757
import { getLanguageDisplayName } from "../common/query-language";
5858
import { runAutoModelQueries } from "./auto-model-codeml-queries";
59-
import { createAutoModelV2Request } from "./auto-model-v2";
59+
import { createAutoModelV2Request, getCandidates } from "./auto-model-v2";
6060
import { load as loadYaml } from "js-yaml";
6161
import { loadDataExtensionYaml } from "./yaml";
62+
import { extLogger } from "../common/logging/vscode";
6263

6364
export class DataExtensionsEditorView extends AbstractWebview<
6465
ToDataExtensionsEditorMessage,
@@ -377,8 +378,22 @@ export class DataExtensionsEditorView extends AbstractWebview<
377378
let predictedModeledMethods: Record<string, ModeledMethod>;
378379

379380
if (useLlmGenerationV2()) {
381+
// Fetch the candidates to send to the model
382+
const candidateMethods = getCandidates(
383+
this.mode,
384+
externalApiUsages,
385+
modeledMethods,
386+
);
387+
388+
// If there are no candidates, there is nothing to model and we just return
389+
if (candidateMethods.length === 0) {
390+
void extLogger.log("No candidates to model. Stopping.");
391+
return;
392+
}
393+
380394
const usages = await runAutoModelQueries({
381395
mode: this.mode,
396+
candidateMethods,
382397
cliServer: this.cliServer,
383398
queryRunner: this.queryRunner,
384399
queryStorageDir: this.queryStorageDir,
@@ -418,12 +433,33 @@ export class DataExtensionsEditorView extends AbstractWebview<
418433
filename: "auto-model.yml",
419434
});
420435

421-
const modeledMethods = loadDataExtensionYaml(models);
422-
if (!modeledMethods) {
436+
const loadedMethods = loadDataExtensionYaml(models);
437+
if (!loadedMethods) {
423438
return;
424439
}
425440

426-
predictedModeledMethods = modeledMethods;
441+
// Any candidate that was part of the response is a negative result
442+
// meaning that the canidate is not a sink for the kinds that the LLM is checking for.
443+
// For now we model this as a sink neutral method, however this is subject
444+
// to discussion.
445+
for (const candidate of candidateMethods) {
446+
if (!(candidate.signature in loadedMethods)) {
447+
loadedMethods[candidate.signature] = {
448+
type: "neutral",
449+
kind: "sink",
450+
input: "",
451+
output: "",
452+
provenance: "ai-generated",
453+
signature: candidate.signature,
454+
packageName: candidate.packageName,
455+
typeName: candidate.typeName,
456+
methodName: candidate.methodName,
457+
methodParameters: candidate.methodParameters,
458+
};
459+
}
460+
}
461+
462+
predictedModeledMethods = loadedMethods;
427463
} else {
428464
const usages = await getAutoModelUsages({
429465
cliServer: this.cliServer,

extensions/ql-vscode/test/unit-tests/data-extensions-editor/auto-model-v2.test.ts

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import {
22
createAutoModelV2Request,
33
encodeSarif,
4+
getCandidates,
45
} from "../../../src/data-extensions-editor/auto-model-v2";
56
import { Mode } from "../../../src/data-extensions-editor/shared/mode";
67
import { AutomodelMode } from "../../../src/data-extensions-editor/auto-model-api-v2";
78
import { AutoModelQueriesResult } from "../../../src/data-extensions-editor/auto-model-codeml-queries";
89
import * as sarif from "sarif";
910
import { gzipDecode } from "../../../src/common/zlib";
11+
import { ExternalApiUsage } from "../../../src/data-extensions-editor/external-api-usage";
12+
import { ModeledMethod } from "../../../src/data-extensions-editor/modeled-method";
1013

1114
describe("createAutoModelV2Request", () => {
1215
const createSarifLog = (queryId: string): sarif.Log => {
@@ -80,3 +83,110 @@ describe("createAutoModelV2Request", () => {
8083
expect(parsed).toEqual(result.candidates);
8184
});
8285
});
86+
87+
describe("getCandidates", () => {
88+
it("doesn't return methods that are already modelled", () => {
89+
const externalApiUsages: ExternalApiUsage[] = [
90+
{
91+
library: "my.jar",
92+
signature: "org.my.A#x()",
93+
packageName: "org.my",
94+
typeName: "A",
95+
methodName: "x",
96+
methodParameters: "()",
97+
supported: false,
98+
supportedType: "none",
99+
usages: [],
100+
},
101+
];
102+
const modeledMethods: Record<string, ModeledMethod> = {
103+
"org.my.A#x()": {
104+
type: "neutral",
105+
kind: "",
106+
input: "",
107+
output: "",
108+
provenance: "manual",
109+
signature: "org.my.A#x()",
110+
packageName: "org.my",
111+
typeName: "A",
112+
methodName: "x",
113+
methodParameters: "()",
114+
},
115+
};
116+
const candidates = getCandidates(
117+
Mode.Application,
118+
externalApiUsages,
119+
modeledMethods,
120+
);
121+
expect(candidates.length).toEqual(0);
122+
});
123+
124+
it("doesn't return methods that are supported from other sources", () => {
125+
const externalApiUsages: ExternalApiUsage[] = [
126+
{
127+
library: "my.jar",
128+
signature: "org.my.A#x()",
129+
packageName: "org.my",
130+
typeName: "A",
131+
methodName: "x",
132+
methodParameters: "()",
133+
supported: true,
134+
supportedType: "none",
135+
usages: [],
136+
},
137+
];
138+
const modeledMethods = {};
139+
const candidates = getCandidates(
140+
Mode.Application,
141+
externalApiUsages,
142+
modeledMethods,
143+
);
144+
expect(candidates.length).toEqual(0);
145+
});
146+
147+
it("returns methods that are neither modeled nor supported from other sources", () => {
148+
const externalApiUsages: ExternalApiUsage[] = [];
149+
externalApiUsages.push({
150+
library: "my.jar",
151+
signature: "org.my.A#x()",
152+
packageName: "org.my",
153+
typeName: "A",
154+
methodName: "x",
155+
methodParameters: "()",
156+
supported: false,
157+
supportedType: "none",
158+
usages: [],
159+
});
160+
const modeledMethods = {};
161+
const candidates = getCandidates(
162+
Mode.Application,
163+
externalApiUsages,
164+
modeledMethods,
165+
);
166+
expect(candidates.length).toEqual(1);
167+
});
168+
169+
it("respects the limit", () => {
170+
const externalApiUsages: ExternalApiUsage[] = [];
171+
for (let i = 0; i < 30; i++) {
172+
externalApiUsages.push({
173+
library: "my.jar",
174+
signature: `org.my.A#x${i}()`,
175+
packageName: "org.my",
176+
typeName: "A",
177+
methodName: `x${i}`,
178+
methodParameters: "()",
179+
supported: false,
180+
supportedType: "none",
181+
usages: [],
182+
});
183+
}
184+
const modeledMethods = {};
185+
const candidates = getCandidates(
186+
Mode.Application,
187+
externalApiUsages,
188+
modeledMethods,
189+
);
190+
expect(candidates.length).toEqual(20);
191+
});
192+
});

0 commit comments

Comments
 (0)