Skip to content

Commit 752c7b2

Browse files
alexetjcreedcmu
authored andcommitted
Move sarif parsing code to a location that can be shared.
1 parent d6b7889 commit 752c7b2

File tree

2 files changed

+126
-120
lines changed

2 files changed

+126
-120
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import * as Sarif from "sarif"
2+
import * as path from "path"
3+
import { LocationStyle, ResolvableLocationValue } from "semmle-bqrs";
4+
5+
export interface SarifLink {
6+
dest: number
7+
text: string
8+
}
9+
10+
11+
type ParsedSarifLocation =
12+
| ResolvableLocationValue
13+
// Resolvable locations have a `file` field, but it will sometimes include
14+
// a source location prefix, which contains build-specific information the user
15+
// doesn't really need to see. We ensure that `userVisibleFile` will not contain
16+
// that, and is appropriate for display in the UI.
17+
& { userVisibleFile: string }
18+
| { t: 'NoLocation', hint: string };
19+
20+
export type SarifMessageComponent = string | SarifLink
21+
22+
/**
23+
* Unescape "[", "]" and "\\" like in sarif plain text messages
24+
*/
25+
export function unescapeSarifText(message: string): string {
26+
return message.replace(/\\\[/g, "[").replace(/\\\]/g, "]").replace(/\\\\/, "\\");
27+
}
28+
29+
export function parseSarifPlainTextMessage(message: string): SarifMessageComponent[] {
30+
let results: SarifMessageComponent[] = [];
31+
32+
// We want something like "[linkText](4)", except that "[" and "]" may be escaped. The lookbehind asserts
33+
// that the initial [ is not escaped. Then we parse a link text with "[" and "]" escaped. Then we parse the numerical target.
34+
// Technically we could have any uri in the target but we don't output that yet.
35+
// The possibility of escaping outside the link is not mentioned in the sarif spec but we always output sartif this way.
36+
const linkRegex = /(?<=(?<!\\)(\\\\)*)\[(?<linkText>([^\\\]\[]|\\\\|\\\]|\\\[)*)\]\((?<linkTarget>[0-9]+)\)/g;
37+
let result: RegExpExecArray | null;
38+
let curIndex = 0;
39+
while ((result = linkRegex.exec(message)) !== null) {
40+
results.push(unescapeSarifText(message.substring(curIndex, result.index)));
41+
const linkText = result.groups!["linkText"];
42+
const linkTarget = +result.groups!["linkTarget"];
43+
results.push({ dest: linkTarget, text: unescapeSarifText(linkText) });
44+
curIndex = result.index + result[0].length;
45+
}
46+
results.push(unescapeSarifText(message.substring(curIndex, message.length)));
47+
return results;
48+
}
49+
50+
51+
/**
52+
* Computes a path normalized to reflect conventional normalization
53+
* of windows paths into zip archive paths.
54+
* @param sourceLocationPrefix The source location prefix of a database. May be
55+
* unix style `/foo/bar/baz` or windows-style `C:\foo\bar\baz`.
56+
* @param sarifRelativeUri A uri relative to sourceLocationPrefix.
57+
* @returns A string that is valid for the `.file` field of a `FivePartLocation`:
58+
* directory separators are normalized, but drive letters `C:` may appear.
59+
*/
60+
export function getPathRelativeToSourceLocationPrefix(sourceLocationPrefix: string, sarifRelativeUui: string) {
61+
const normalizedSourceLocationPrefix = sourceLocationPrefix.replace(/\\/g, '/');
62+
return path.join(normalizedSourceLocationPrefix, decodeURIComponent(sarifRelativeUui));
63+
}
64+
65+
export function parseSarifLocation(loc: Sarif.Location, sourceLocationPrefix: string): ParsedSarifLocation {
66+
const physicalLocation = loc.physicalLocation;
67+
if (physicalLocation === undefined)
68+
return { t: 'NoLocation', hint: 'no physical location' };
69+
if (physicalLocation.artifactLocation === undefined)
70+
return { t: 'NoLocation', hint: 'no artifact location' };
71+
if (physicalLocation.artifactLocation.uri === undefined)
72+
return { t: 'NoLocation', hint: 'artifact location has no uri' };
73+
74+
// This is not necessarily really an absolute uri; it could either be a
75+
// file uri or a relative uri.
76+
const uri = physicalLocation.artifactLocation.uri;
77+
78+
const fileUriRegex = /^file:/;
79+
const effectiveLocation = uri.match(fileUriRegex) ?
80+
decodeURIComponent(uri.replace(fileUriRegex, '')) :
81+
getPathRelativeToSourceLocationPrefix(sourceLocationPrefix, uri);
82+
const userVisibleFile = uri.match(fileUriRegex) ?
83+
decodeURIComponent(uri.replace(fileUriRegex, '')) :
84+
uri;
85+
86+
if (physicalLocation.region === undefined) {
87+
// If the region property is absent, the physicalLocation object refers to the entire file.
88+
// Source: https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012638.
89+
// TODO: Do we get here if we provide a non-filesystem URL?
90+
return {
91+
t: LocationStyle.WholeFile,
92+
file: effectiveLocation,
93+
userVisibleFile,
94+
};
95+
} else {
96+
const region = physicalLocation.region;
97+
// We assume that the SARIF we're given always has startLine
98+
// This is not mandated by the SARIF spec, but should be true of
99+
// SARIF output by our own tools.
100+
const lineStart = region.startLine!;
101+
102+
// These defaults are from SARIF 2.1.0 spec, section 3.30.2, "Text Regions"
103+
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Ref493492556
104+
const lineEnd = region.endLine === undefined ? lineStart : region.endLine;
105+
const colStart = region.startColumn === undefined ? 1 : region.startColumn;
106+
107+
// We also assume that our tools will always supply `endColumn` field, which is
108+
// fortunate, since the SARIF spec says that it defaults to the end of the line, whose
109+
// length we don't know at this point in the code.
110+
//
111+
// It is off by one with respect to the way vscode counts columns in selections.
112+
const colEnd = region.endColumn! - 1;
113+
114+
return {
115+
t: LocationStyle.FivePart,
116+
file: effectiveLocation,
117+
userVisibleFile,
118+
lineStart,
119+
colStart,
120+
lineEnd,
121+
colEnd,
122+
};
123+
}
124+
}

extensions/ql-vscode/src/view/alert-table.tsx

Lines changed: 2 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -2,75 +2,18 @@ import * as path from 'path';
22
import * as React from 'react';
33
import * as Sarif from 'sarif';
44
import * as Keys from '../result-keys';
5-
import { LocationStyle, ResolvableLocationValue } from 'semmle-bqrs';
5+
import { LocationStyle } from 'semmle-bqrs';
66
import * as octicons from './octicons';
77
import { className, renderLocation, ResultTableProps, zebraStripe, selectableZebraStripe, jumpToLocation } from './result-table-utils';
88
import { PathTableResultSet, onNavigation, NavigationEvent } from './results';
9+
import { parseSarifPlainTextMessage, parseSarifLocation } from '../sarif-utils';
910

1011
export type PathTableProps = ResultTableProps & { resultSet: PathTableResultSet };
1112
export interface PathTableState {
1213
expanded: { [k: string]: boolean };
1314
selectedPathNode: undefined | Keys.PathNode;
1415
}
1516

16-
interface SarifLink {
17-
dest: number
18-
text: string
19-
}
20-
21-
type ParsedSarifLocation =
22-
| ResolvableLocationValue
23-
// Resolvable locations have a `file` field, but it will sometimes include
24-
// a source location prefix, which contains build-specific information the user
25-
// doesn't really need to see. We ensure that `userVisibleFile` will not contain
26-
// that, and is appropriate for display in the UI.
27-
& { userVisibleFile: string }
28-
| { t: 'NoLocation', hint: string };
29-
30-
type SarifMessageComponent = string | SarifLink
31-
32-
/**
33-
* Unescape "[", "]" and "\\" like in sarif plain text messages
34-
*/
35-
function unescapeSarifText(message: string): string {
36-
return message.replace(/\\\[/g, "[").replace(/\\\]/g, "]").replace(/\\\\/, "\\");
37-
}
38-
39-
function parseSarifPlainTextMessage(message: string): SarifMessageComponent[] {
40-
let results: SarifMessageComponent[] = [];
41-
42-
// We want something like "[linkText](4)", except that "[" and "]" may be escaped. The lookbehind asserts
43-
// that the initial [ is not escaped. Then we parse a link text with "[" and "]" escaped. Then we parse the numerical target.
44-
// Technically we could have any uri in the target but we don't output that yet.
45-
// The possibility of escaping outside the link is not mentioned in the sarif spec but we always output sartif this way.
46-
const linkRegex = /(?<=(?<!\\)(\\\\)*)\[(?<linkText>([^\\\]\[]|\\\\|\\\]|\\\[)*)\]\((?<linkTarget>[0-9]+)\)/g;
47-
let result: RegExpExecArray | null;
48-
let curIndex = 0;
49-
while ((result = linkRegex.exec(message)) !== null) {
50-
results.push(unescapeSarifText(message.substring(curIndex, result.index)));
51-
const linkText = result.groups!["linkText"];
52-
const linkTarget = +result.groups!["linkTarget"];
53-
results.push({ dest: linkTarget, text: unescapeSarifText(linkText) });
54-
curIndex = result.index + result[0].length;
55-
}
56-
results.push(unescapeSarifText(message.substring(curIndex, message.length)));
57-
return results;
58-
}
59-
60-
/**
61-
* Computes a path normalized to reflect conventional normalization
62-
* of windows paths into zip archive paths.
63-
* @param sourceLocationPrefix The source location prefix of a database. May be
64-
* unix style `/foo/bar/baz` or windows-style `C:\foo\bar\baz`.
65-
* @param sarifRelativeUri A uri relative to sourceLocationPrefix.
66-
* @returns A string that is valid for the `.file` field of a `FivePartLocation`:
67-
* directory separators are normalized, but drive letters `C:` may appear.
68-
*/
69-
export function getPathRelativeToSourceLocationPrefix(sourceLocationPrefix: string, sarifRelativeUui: string) {
70-
const normalizedSourceLocationPrefix = sourceLocationPrefix.replace(/\\/g, '/');
71-
return path.join(normalizedSourceLocationPrefix, decodeURIComponent(sarifRelativeUui));
72-
}
73-
7417
export class PathTable extends React.Component<PathTableProps, PathTableState> {
7518
constructor(props: PathTableProps) {
7619
super(props);
@@ -323,64 +266,3 @@ export class PathTable extends React.Component<PathTableProps, PathTableState> {
323266
onNavigation.removeListener(this.handleNavigationEvent);
324267
}
325268
}
326-
327-
function parseSarifLocation(loc: Sarif.Location, sourceLocationPrefix: string): ParsedSarifLocation {
328-
const physicalLocation = loc.physicalLocation;
329-
if (physicalLocation === undefined)
330-
return { t: 'NoLocation', hint: 'no physical location' };
331-
if (physicalLocation.artifactLocation === undefined)
332-
return { t: 'NoLocation', hint: 'no artifact location' };
333-
if (physicalLocation.artifactLocation.uri === undefined)
334-
return { t: 'NoLocation', hint: 'artifact location has no uri' };
335-
336-
// This is not necessarily really an absolute uri; it could either be a
337-
// file uri or a relative uri.
338-
const uri = physicalLocation.artifactLocation.uri;
339-
340-
const fileUriRegex = /^file:/;
341-
const effectiveLocation = uri.match(fileUriRegex) ?
342-
decodeURIComponent(uri.replace(fileUriRegex, '')) :
343-
getPathRelativeToSourceLocationPrefix(sourceLocationPrefix, uri);
344-
const userVisibleFile = uri.match(fileUriRegex) ?
345-
decodeURIComponent(uri.replace(fileUriRegex, '')) :
346-
uri;
347-
348-
if (physicalLocation.region === undefined) {
349-
// If the region property is absent, the physicalLocation object refers to the entire file.
350-
// Source: https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012638.
351-
// TODO: Do we get here if we provide a non-filesystem URL?
352-
return {
353-
t: LocationStyle.WholeFile,
354-
file: effectiveLocation,
355-
userVisibleFile,
356-
};
357-
} else {
358-
const region = physicalLocation.region;
359-
// We assume that the SARIF we're given always has startLine
360-
// This is not mandated by the SARIF spec, but should be true of
361-
// SARIF output by our own tools.
362-
const lineStart = region.startLine!;
363-
364-
// These defaults are from SARIF 2.1.0 spec, section 3.30.2, "Text Regions"
365-
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Ref493492556
366-
const lineEnd = region.endLine === undefined ? lineStart : region.endLine;
367-
const colStart = region.startColumn === undefined ? 1 : region.startColumn;
368-
369-
// We also assume that our tools will always supply `endColumn` field, which is
370-
// fortunate, since the SARIF spec says that it defaults to the end of the line, whose
371-
// length we don't know at this point in the code.
372-
//
373-
// It is off by one with respect to the way vscode counts columns in selections.
374-
const colEnd = region.endColumn! - 1;
375-
376-
return {
377-
t: LocationStyle.FivePart,
378-
file: effectiveLocation,
379-
userVisibleFile,
380-
lineStart,
381-
colStart,
382-
lineEnd,
383-
colEnd,
384-
};
385-
}
386-
}

0 commit comments

Comments
 (0)