Skip to content

Commit 2f61cfe

Browse files
authored
Merge pull request #2457 from github/koesie10/auto-model-usages-sarif
Retrieve external API usage snippets using SARIF
2 parents 5387546 + dd268af commit 2f61cfe

File tree

14 files changed

+325
-72
lines changed

14 files changed

+325
-72
lines changed

extensions/ql-vscode/src/codeql-cli/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,13 +1073,15 @@ export class CodeQLCliServer implements Disposable {
10731073
resultsPath: string,
10741074
interpretedResultsPath: string,
10751075
sourceInfo?: SourceInfo,
1076+
args?: string[],
10761077
): Promise<sarif.Log> {
10771078
const additionalArgs = [
10781079
// TODO: This flag means that we don't group interpreted results
10791080
// by primary location. We may want to revisit whether we call
10801081
// interpretation with and without this flag, or do some
10811082
// grouping client-side.
10821083
"--no-group-results",
1084+
...(args ?? []),
10831085
];
10841086

10851087
await this.runInterpretCommand(
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import { CancellationTokenSource } from "vscode";
2+
import { join } from "path";
3+
import { runQuery } from "./external-api-usage-query";
4+
import { CodeQLCliServer } from "../codeql-cli/cli";
5+
import { QueryRunner } from "../query-server";
6+
import { DatabaseItem } from "../databases/local-databases";
7+
import { interpretResultsSarif } from "../query-results";
8+
import { ProgressCallback } from "../common/vscode/progress";
9+
10+
type Options = {
11+
cliServer: CodeQLCliServer;
12+
queryRunner: QueryRunner;
13+
databaseItem: DatabaseItem;
14+
queryStorageDir: string;
15+
16+
progress: ProgressCallback;
17+
};
18+
19+
export type UsageSnippetsBySignature = Record<string, string[]>;
20+
21+
export async function getAutoModelUsages({
22+
cliServer,
23+
queryRunner,
24+
databaseItem,
25+
queryStorageDir,
26+
progress,
27+
}: Options): Promise<UsageSnippetsBySignature> {
28+
const maxStep = 1500;
29+
30+
const cancellationTokenSource = new CancellationTokenSource();
31+
32+
// This will re-run the query that was already run when opening the data extensions editor. This
33+
// might be unnecessary, but this makes it really easy to get the path to the BQRS file which we
34+
// need to interpret the results.
35+
const queryResult = await runQuery({
36+
cliServer,
37+
queryRunner,
38+
queryStorageDir,
39+
databaseItem,
40+
progress: (update) =>
41+
progress({
42+
maxStep,
43+
step: update.step,
44+
message: update.message,
45+
}),
46+
token: cancellationTokenSource.token,
47+
});
48+
if (!queryResult) {
49+
throw new Error("Query failed");
50+
}
51+
52+
progress({
53+
maxStep,
54+
step: 1100,
55+
message: "Retrieving source location prefix",
56+
});
57+
58+
// CodeQL needs to have access to the database to be able to retrieve the
59+
// snippets from it. The source location prefix is used to determine the
60+
// base path of the database.
61+
const sourceLocationPrefix = await databaseItem.getSourceLocationPrefix(
62+
cliServer,
63+
);
64+
const sourceArchiveUri = databaseItem.sourceArchive;
65+
const sourceInfo =
66+
sourceArchiveUri === undefined
67+
? undefined
68+
: {
69+
sourceArchive: sourceArchiveUri.fsPath,
70+
sourceLocationPrefix,
71+
};
72+
73+
progress({
74+
maxStep,
75+
step: 1200,
76+
message: "Interpreting results",
77+
});
78+
79+
// Convert the results to SARIF so that Codeql will retrieve the snippets
80+
// from the datababe. This means we don't need to do that in the extension
81+
// and everything is handled by the CodeQL CLI.
82+
const sarif = await interpretResultsSarif(
83+
cliServer,
84+
{
85+
// To interpret the results we need to provide metadata about the query. We could do this using
86+
// `resolveMetadata` but that would be an extra call to the CodeQL CLI server and would require
87+
// us to know the path to the query on the filesystem. Since we know what the metadata should
88+
// look like and the only metadata that the CodeQL CLI requires is an ID and the kind, we can
89+
// simply use constants here.
90+
kind: "problem",
91+
id: "usage",
92+
},
93+
{
94+
resultsPath: queryResult.outputDir.bqrsPath,
95+
interpretedResultsPath: join(
96+
queryStorageDir,
97+
"interpreted-results.sarif",
98+
),
99+
},
100+
sourceInfo,
101+
["--sarif-add-snippets"],
102+
);
103+
104+
progress({
105+
maxStep,
106+
step: 1400,
107+
message: "Parsing results",
108+
});
109+
110+
const snippets: UsageSnippetsBySignature = {};
111+
112+
const results = sarif.runs[0]?.results;
113+
if (!results) {
114+
throw new Error("No results");
115+
}
116+
117+
// This will group the snippets by the method signature.
118+
for (const result of results) {
119+
const signature = result.message.text;
120+
121+
const snippet =
122+
result.locations?.[0]?.physicalLocation?.contextRegion?.snippet?.text;
123+
124+
if (!signature || !snippet) {
125+
continue;
126+
}
127+
128+
if (!(signature in snippets)) {
129+
snippets[signature] = [];
130+
}
131+
132+
snippets[signature].push(snippet);
133+
}
134+
135+
return snippets;
136+
}

extensions/ql-vscode/src/data-extensions-editor/auto-model.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ import {
66
Method,
77
ModelRequest,
88
} from "./auto-model-api";
9+
import type { UsageSnippetsBySignature } from "./auto-model-usages-query";
910

1011
export function createAutoModelRequest(
1112
language: string,
1213
externalApiUsages: ExternalApiUsage[],
1314
modeledMethods: Record<string, ModeledMethod>,
15+
usages: UsageSnippetsBySignature,
1416
): ModelRequest {
1517
const request: ModelRequest = {
1618
language,
@@ -29,6 +31,10 @@ export function createAutoModelRequest(
2931
type: "none",
3032
};
3133

34+
const usagesForMethod =
35+
usages[externalApiUsage.signature] ??
36+
externalApiUsage.usages.map((usage) => usage.label);
37+
3238
const numberOfArguments =
3339
externalApiUsage.methodParameters === "()"
3440
? 0
@@ -48,9 +54,7 @@ export function createAutoModelRequest(
4854
modeledMethod.type === "none"
4955
? undefined
5056
: toMethodClassification(modeledMethod),
51-
usages: externalApiUsage.usages
52-
.slice(0, 10)
53-
.map((usage) => usage.label),
57+
usages: usagesForMethod.slice(0, 10),
5458
input: `Argument[${argumentIndex}]`,
5559
};
5660

extensions/ql-vscode/src/data-extensions-editor/bqrs.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ export function decodeBqrsToExternalApiUsages(
77
const methodsByApiName = new Map<string, ExternalApiUsage>();
88

99
chunk?.tuples.forEach((tuple) => {
10-
const signature = tuple[0] as string;
11-
const supported = tuple[1] as boolean;
12-
const usage = tuple[2] as Call;
10+
const usage = tuple[0] as Call;
11+
const signature = tuple[1] as string;
12+
const supported = (tuple[2] as string) === "true";
1313

1414
const [packageWithType, methodDeclaration] = signature.split("#");
1515

extensions/ql-vscode/src/data-extensions-editor/data-extensions-editor-view.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
parsePredictedClassifications,
4646
} from "./auto-model";
4747
import { showLlmGeneration } from "../config";
48+
import { getAutoModelUsages } from "./auto-model-usages-query";
4849

4950
function getQlSubmoduleFolder(): WorkspaceFolder | undefined {
5051
const workspaceFolder = workspace.workspaceFolders?.find(
@@ -385,23 +386,66 @@ export class DataExtensionsEditorView extends AbstractWebview<
385386
externalApiUsages: ExternalApiUsage[],
386387
modeledMethods: Record<string, ModeledMethod>,
387388
): Promise<void> {
389+
const maxStep = 3000;
390+
391+
await this.showProgress({
392+
step: 0,
393+
maxStep,
394+
message: "Retrieving usages",
395+
});
396+
397+
const usages = await getAutoModelUsages({
398+
cliServer: this.cliServer,
399+
queryRunner: this.queryRunner,
400+
queryStorageDir: this.queryStorageDir,
401+
databaseItem: this.databaseItem,
402+
progress: (update) => this.showProgress(update, maxStep),
403+
});
404+
405+
await this.showProgress({
406+
step: 1800,
407+
maxStep,
408+
message: "Creating request",
409+
});
410+
388411
const request = createAutoModelRequest(
389412
this.databaseItem.language,
390413
externalApiUsages,
391414
modeledMethods,
415+
usages,
392416
);
393417

418+
await this.showProgress({
419+
step: 2000,
420+
maxStep,
421+
message: "Sending request",
422+
});
423+
394424
const response = await autoModel(this.app.credentials, request);
395425

426+
await this.showProgress({
427+
step: 2500,
428+
maxStep,
429+
message: "Parsing response",
430+
});
431+
396432
const predictedModeledMethods = parsePredictedClassifications(
397433
response.predicted,
398434
);
399435

436+
await this.showProgress({
437+
step: 2800,
438+
maxStep,
439+
message: "Applying results",
440+
});
441+
400442
await this.postMessage({
401443
t: "addModeledMethods",
402444
modeledMethods: predictedModeledMethods,
403445
overrideNone: true,
404446
});
447+
448+
await this.clearProgress();
405449
}
406450

407451
/*

extensions/ql-vscode/src/data-extensions-editor/queries/csharp.ts

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,28 @@ export const fetchExternalApisQuery: Query = {
55
* @name Usage of APIs coming from external libraries
66
* @description A list of 3rd party APIs used in the codebase.
77
* @tags telemetry
8+
* @kind problem
89
* @id cs/telemetry/fetch-external-apis
910
*/
1011
11-
import csharp
12-
import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
13-
import ExternalApi
14-
15-
private Call aUsage(ExternalApi api) {
16-
result.getTarget().getUnboundDeclaration() = api
17-
}
18-
19-
private boolean isSupported(ExternalApi api) {
20-
api.isSupported() and result = true
21-
or
22-
not api.isSupported() and
23-
result = false
24-
}
25-
26-
from ExternalApi api, string apiName, boolean supported, Call usage
27-
where
28-
apiName = api.getApiName() and
29-
supported = isSupported(api) and
30-
usage = aUsage(api)
31-
select apiName, supported, usage
12+
import csharp
13+
import ExternalApi
14+
15+
private Call aUsage(ExternalApi api) { result.getTarget().getUnboundDeclaration() = api }
16+
17+
private boolean isSupported(ExternalApi api) {
18+
api.isSupported() and result = true
19+
or
20+
not api.isSupported() and
21+
result = false
22+
}
23+
24+
from ExternalApi api, string apiName, boolean supported, Call usage
25+
where
26+
apiName = api.getApiName() and
27+
supported = isSupported(api) and
28+
usage = aUsage(api)
29+
select usage, apiName, supported.toString(), "supported"
3230
`,
3331
dependencies: {
3432
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

extensions/ql-vscode/src/data-extensions-editor/queries/java.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ export const fetchExternalApisQuery: Query = {
55
* @name Usage of APIs coming from external libraries
66
* @description A list of 3rd party APIs used in the codebase. Excludes test and generated code.
77
* @tags telemetry
8+
* @kind problem
89
* @id java/telemetry/fetch-external-apis
910
*/
1011
1112
import java
12-
import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
1313
import ExternalApi
1414
1515
private Call aUsage(ExternalApi api) {
@@ -28,7 +28,7 @@ where
2828
apiName = api.getApiName() and
2929
supported = isSupported(api) and
3030
usage = aUsage(api)
31-
select apiName, supported, usage
31+
select usage, apiName, supported.toString(), "supported"
3232
`,
3333
dependencies: {
3434
"ExternalApi.qll": `/** Provides classes and predicates related to handling APIs from external libraries. */

extensions/ql-vscode/src/data-extensions-editor/queries/query.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
export type Query = {
2+
/**
3+
* The main query.
4+
*
5+
* It should select all usages of external APIs, and return the following result pattern:
6+
* - usage: the usage of the external API. This is an entity.
7+
* - apiName: the name of the external API. This is a string.
8+
* - supported: whether the external API is supported by the extension. This should be a string representation of a boolean to satify the result pattern for a problem query.
9+
* - "supported": a string literal. This is required to make the query a valid problem query.
10+
*/
211
mainQuery: string;
312
dependencies?: {
413
[filename: string]: string;

extensions/ql-vscode/src/pure/bqrs-cli-types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ export type BqrsKind =
115115
| "Entity";
116116

117117
interface BqrsColumn {
118-
name: string;
118+
name?: string;
119119
kind: BqrsKind;
120120
}
121121
export interface DecodedBqrsChunk {

extensions/ql-vscode/src/query-results.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ export async function interpretResultsSarif(
139139
metadata: QueryMetadata | undefined,
140140
resultsPaths: ResultsPaths,
141141
sourceInfo?: cli.SourceInfo,
142+
args?: string[],
142143
): Promise<SarifInterpretationData> {
143144
const { resultsPath, interpretedResultsPath } = resultsPaths;
144145
let res;
@@ -150,6 +151,7 @@ export async function interpretResultsSarif(
150151
resultsPath,
151152
interpretedResultsPath,
152153
sourceInfo,
154+
args,
153155
);
154156
}
155157
return { ...res, t: "SarifInterpretationData" };

0 commit comments

Comments
 (0)