Skip to content

Commit c0187a5

Browse files
committed
Limit SARIF code snippet size
This adds a new filtering on SARIF code snippets for very large code snippets (defined as 8MB or more). If less than 1% of such a snippet is highlighted, it will not include the code snippet in the analysed results, and it will thus not be shown in the UI. This is to avoid very large SARIF files that can cause the extension host to crash when the analysis results are send to the UI. I don't think any of these snippets would ever be useful to show, so it should be fine to just not include them.
1 parent c9d6bfd commit c0187a5

2 files changed

Lines changed: 40 additions & 6 deletions

File tree

extensions/ql-vscode/src/pure/sarif-utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as Sarif from "sarif";
2-
import { HighlightedRegion } from "../remote-queries/shared/analysis-result";
2+
import type { HighlightedRegion } from "../remote-queries/shared/analysis-result";
33
import { ResolvableLocationValue } from "./bqrs-cli-types";
44

55
export interface SarifLink {

extensions/ql-vscode/src/remote-queries/sarif-processing.ts

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import * as sarif from "sarif";
22
import {
3+
parseHighlightedLine,
34
parseSarifPlainTextMessage,
45
parseSarifRegion,
56
} from "../pure/sarif-utils";
@@ -15,6 +16,11 @@ import {
1516
HighlightedRegion,
1617
} from "./shared/analysis-result";
1718

19+
// A line of more than 8k characters is probably generated.
20+
const CODE_SNIPPET_LARGE_LINE_SIZE_LIMIT = 8192;
21+
// If less than 1% of the line is highlighted, we consider it a small snippet.
22+
const CODE_SNIPPET_HIGHLIGHTED_REGION_MINIMUM_PERCENTAGE = 0.01;
23+
1824
const defaultSeverity = "Warning";
1925

2026
export function extractAnalysisAlerts(
@@ -163,17 +169,45 @@ export function tryGetRule(
163169
}
164170

165171
function getCodeSnippet(
172+
contextRegion?: sarif.Region,
166173
region?: sarif.Region,
167-
alternateRegion?: sarif.Region,
168174
): CodeSnippet | undefined {
169-
region = region ?? alternateRegion;
175+
const actualRegion = contextRegion ?? region;
170176

171-
if (!region) {
177+
if (!actualRegion) {
172178
return undefined;
173179
}
174180

175-
const text = region.snippet?.text || "";
176-
const { startLine, endLine } = parseSarifRegion(region);
181+
const text = actualRegion.snippet?.text || "";
182+
const { startLine, endLine } = parseSarifRegion(actualRegion);
183+
184+
if (
185+
contextRegion &&
186+
region &&
187+
text.length > CODE_SNIPPET_LARGE_LINE_SIZE_LIMIT
188+
) {
189+
const code = text.split("\n");
190+
191+
const highlightedRegion = parseSarifRegion(region);
192+
193+
const highlightedLines = code.map((line, index) => {
194+
return parseHighlightedLine(line, startLine + index, highlightedRegion);
195+
});
196+
197+
const highlightedCharactersCount = highlightedLines
198+
.map((line) => line.highlightedSection.length)
199+
.reduce((a, b) => a + b, 0);
200+
201+
const highlightedPercentage = highlightedCharactersCount / text.length;
202+
203+
if (
204+
highlightedPercentage < CODE_SNIPPET_HIGHLIGHTED_REGION_MINIMUM_PERCENTAGE
205+
) {
206+
// If not enough is highlighted and the snippet is large, it's probably generated or bundled code and
207+
// we don't want to show it.
208+
return undefined;
209+
}
210+
}
177211

178212
return {
179213
startLine,

0 commit comments

Comments
 (0)