Skip to content

Commit f1533dd

Browse files
author
Dave Bartolomeo
authored
Merge pull request #2858 from github/dbartol/long-strings
Use streaming when creating log symbols file.
2 parents b1debee + 3c63df2 commit f1533dd

File tree

4 files changed

+263
-155
lines changed

4 files changed

+263
-155
lines changed

extensions/ql-vscode/src/codeql-cli/cli.ts

Lines changed: 2 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
66
import * as sarif from "sarif";
77
import { SemVer } from "semver";
88
import { Readable } from "stream";
9-
import { StringDecoder } from "string_decoder";
109
import tk from "tree-kill";
1110
import { promisify } from "util";
1211
import { CancellationToken, Disposable, Uri } from "vscode";
@@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
3130
import { sarifParser } from "../common/sarif-parser";
3231
import { App } from "../common/app";
3332
import { QueryLanguage } from "../common/query-language";
33+
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
3434

3535
/**
3636
* The version of the SARIF format that we are using.
@@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
16491649
}
16501650
}
16511651

1652-
/**
1653-
* Buffer to hold state used when splitting a text stream into lines.
1654-
*/
1655-
class SplitBuffer {
1656-
private readonly decoder = new StringDecoder("utf8");
1657-
private readonly maxSeparatorLength: number;
1658-
private buffer = "";
1659-
private searchIndex = 0;
1660-
1661-
constructor(private readonly separators: readonly string[]) {
1662-
this.maxSeparatorLength = separators
1663-
.map((s) => s.length)
1664-
.reduce((a, b) => Math.max(a, b), 0);
1665-
}
1666-
1667-
/**
1668-
* Append new text data to the buffer.
1669-
* @param chunk The chunk of data to append.
1670-
*/
1671-
public addChunk(chunk: Buffer): void {
1672-
this.buffer += this.decoder.write(chunk);
1673-
}
1674-
1675-
/**
1676-
* Signal that the end of the input stream has been reached.
1677-
*/
1678-
public end(): void {
1679-
this.buffer += this.decoder.end();
1680-
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
1681-
}
1682-
1683-
/**
1684-
* A version of startsWith that isn't overriden by a broken version of ms-python.
1685-
*
1686-
* The definition comes from
1687-
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
1688-
* which is CC0/public domain
1689-
*
1690-
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
1691-
*/
1692-
private static startsWith(
1693-
s: string,
1694-
searchString: string,
1695-
position: number,
1696-
): boolean {
1697-
const pos = position > 0 ? position | 0 : 0;
1698-
return s.substring(pos, pos + searchString.length) === searchString;
1699-
}
1700-
1701-
/**
1702-
* Extract the next full line from the buffer, if one is available.
1703-
* @returns The text of the next available full line (without the separator), or `undefined` if no
1704-
* line is available.
1705-
*/
1706-
public getNextLine(): string | undefined {
1707-
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
1708-
for (const separator of this.separators) {
1709-
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
1710-
const line = this.buffer.slice(0, this.searchIndex);
1711-
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
1712-
this.searchIndex = 0;
1713-
return line;
1714-
}
1715-
}
1716-
this.searchIndex++;
1717-
}
1718-
1719-
return undefined;
1720-
}
1721-
}
1722-
1723-
/**
1724-
* Splits a text stream into lines based on a list of valid line separators.
1725-
* @param stream The text stream to split. This stream will be fully consumed.
1726-
* @param separators The list of strings that act as line separators.
1727-
* @returns A sequence of lines (not including separators).
1728-
*/
1729-
async function* splitStreamAtSeparators(
1730-
stream: Readable,
1731-
separators: string[],
1732-
): AsyncGenerator<string, void, unknown> {
1733-
const buffer = new SplitBuffer(separators);
1734-
for await (const chunk of stream) {
1735-
buffer.addChunk(chunk);
1736-
let line: string | undefined;
1737-
do {
1738-
line = buffer.getNextLine();
1739-
if (line !== undefined) {
1740-
yield line;
1741-
}
1742-
} while (line !== undefined);
1743-
}
1744-
buffer.end();
1745-
let line: string | undefined;
1746-
do {
1747-
line = buffer.getNextLine();
1748-
if (line !== undefined) {
1749-
yield line;
1750-
}
1751-
} while (line !== undefined);
1752-
}
1753-
1754-
/**
1755-
* Standard line endings for splitting human-readable text.
1756-
*/
1757-
const lineEndings = ["\r\n", "\r", "\n"];
1758-
17591652
/**
17601653
* Log a text stream to a `Logger` interface.
17611654
* @param stream The stream to log.
17621655
* @param logger The logger that will consume the stream output.
17631656
*/
17641657
async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
1765-
for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
1658+
for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
17661659
// Await the result of log here in order to ensure the logs are written in the correct order.
17671660
await logger.log(line);
17681661
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import { Readable } from "stream";
2+
import { StringDecoder } from "string_decoder";
3+
4+
/**
5+
* Buffer to hold state used when splitting a text stream into lines.
6+
*/
7+
export class SplitBuffer {
8+
private readonly decoder = new StringDecoder("utf8");
9+
private readonly maxSeparatorLength: number;
10+
private buffer = "";
11+
private searchIndex = 0;
12+
private ended = false;
13+
14+
constructor(private readonly separators: readonly string[]) {
15+
this.maxSeparatorLength = separators
16+
.map((s) => s.length)
17+
.reduce((a, b) => Math.max(a, b), 0);
18+
}
19+
20+
/**
21+
* Append new text data to the buffer.
22+
* @param chunk The chunk of data to append.
23+
*/
24+
public addChunk(chunk: Buffer): void {
25+
this.buffer += this.decoder.write(chunk);
26+
}
27+
28+
/**
29+
* Signal that the end of the input stream has been reached.
30+
*/
31+
public end(): void {
32+
this.buffer += this.decoder.end();
33+
this.ended = true;
34+
}
35+
36+
/**
37+
* A version of startsWith that isn't overriden by a broken version of ms-python.
38+
*
39+
* The definition comes from
40+
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
41+
* which is CC0/public domain
42+
*
43+
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
44+
*/
45+
private static startsWith(
46+
s: string,
47+
searchString: string,
48+
position: number,
49+
): boolean {
50+
const pos = position > 0 ? position | 0 : 0;
51+
return s.substring(pos, pos + searchString.length) === searchString;
52+
}
53+
54+
/**
55+
* Extract the next full line from the buffer, if one is available.
56+
* @returns The text of the next available full line (without the separator), or `undefined` if no
57+
* line is available.
58+
*/
59+
public getNextLine(): string | undefined {
60+
// If we haven't received all of the input yet, don't search too close to the end of the buffer,
61+
// or we could match a separator that's split across two chunks. For example, we could see "\r"
62+
// at the end of the buffer and match that, even though we were about to receive a "\n" right
63+
// after it.
64+
const maxSearchIndex = this.ended
65+
? this.buffer.length - 1
66+
: this.buffer.length - this.maxSeparatorLength;
67+
while (this.searchIndex <= maxSearchIndex) {
68+
for (const separator of this.separators) {
69+
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
70+
const line = this.buffer.slice(0, this.searchIndex);
71+
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
72+
this.searchIndex = 0;
73+
return line;
74+
}
75+
}
76+
this.searchIndex++;
77+
}
78+
79+
if (this.ended && this.buffer.length > 0) {
80+
// If we still have some text left in the buffer, return it as the last line.
81+
const line = this.buffer;
82+
this.buffer = "";
83+
this.searchIndex = 0;
84+
return line;
85+
} else {
86+
return undefined;
87+
}
88+
}
89+
}
90+
91+
/**
92+
* Splits a text stream into lines based on a list of valid line separators.
93+
* @param stream The text stream to split. This stream will be fully consumed.
94+
* @param separators The list of strings that act as line separators.
95+
* @returns A sequence of lines (not including separators).
96+
*/
97+
export async function* splitStreamAtSeparators(
98+
stream: Readable,
99+
separators: string[],
100+
): AsyncGenerator<string, void, unknown> {
101+
const buffer = new SplitBuffer(separators);
102+
for await (const chunk of stream) {
103+
buffer.addChunk(chunk);
104+
let line: string | undefined;
105+
do {
106+
line = buffer.getNextLine();
107+
if (line !== undefined) {
108+
yield line;
109+
}
110+
} while (line !== undefined);
111+
}
112+
buffer.end();
113+
let line: string | undefined;
114+
do {
115+
line = buffer.getNextLine();
116+
if (line !== undefined) {
117+
yield line;
118+
}
119+
} while (line !== undefined);
120+
}
121+
122+
/**
123+
* Standard line endings for splitting human-readable text.
124+
*/
125+
export const LINE_ENDINGS = ["\r\n", "\r", "\n"];

extensions/ql-vscode/src/log-insights/summary-parser.ts

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import { writeFile, promises } from "fs-extra";
1+
import { createReadStream, writeFile } from "fs-extra";
2+
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
23

34
/**
45
* Location information for a single pipeline invocation in the RA.
@@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
6465
async function generateSummarySymbols(
6566
summaryPath: string,
6667
): Promise<SummarySymbols> {
67-
const summary = await promises.readFile(summaryPath, {
68+
const stream = createReadStream(summaryPath, {
6869
encoding: "utf-8",
6970
});
70-
const symbols: SummarySymbols = {
71-
predicates: {},
72-
};
71+
try {
72+
const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);
7373

74-
const lines = summary.split(/\r?\n/);
75-
let lineNumber = 0;
76-
while (lineNumber < lines.length) {
77-
const startLineNumber = lineNumber;
78-
lineNumber++;
79-
const startLine = lines[startLineNumber];
80-
const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
81-
let predicateName: string | undefined = undefined;
82-
let iteration = 0;
83-
if (nonRecursiveMatch) {
84-
predicateName = nonRecursiveMatch.groups!.predicateName;
85-
} else {
86-
const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
87-
if (recursiveMatch?.groups) {
88-
predicateName = recursiveMatch.groups.predicateName;
89-
iteration = parseInt(recursiveMatch.groups.iteration);
90-
}
91-
}
74+
const symbols: SummarySymbols = {
75+
predicates: {},
76+
};
9277

93-
if (predicateName !== undefined) {
94-
const raStartLine = lineNumber;
95-
let raEndLine: number | undefined = undefined;
96-
while (lineNumber < lines.length && raEndLine === undefined) {
97-
const raLine = lines[lineNumber];
98-
const returnMatch = raLine.match(RETURN_REGEXP);
99-
if (returnMatch) {
100-
raEndLine = lineNumber;
78+
let lineNumber = 0;
79+
let raStartLine = 0;
80+
let iteration = 0;
81+
let predicateName: string | undefined = undefined;
82+
let startLine = 0;
83+
for await (const line of lines) {
84+
if (predicateName === undefined) {
85+
// Looking for the start of the predicate.
86+
const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
87+
if (nonRecursiveMatch) {
88+
iteration = 0;
89+
predicateName = nonRecursiveMatch.groups!.predicateName;
90+
} else {
91+
const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
92+
if (recursiveMatch?.groups) {
93+
predicateName = recursiveMatch.groups.predicateName;
94+
iteration = parseInt(recursiveMatch.groups.iteration);
95+
}
10196
}
102-
lineNumber++;
103-
}
104-
if (raEndLine !== undefined) {
105-
let symbol = symbols.predicates[predicateName];
106-
if (symbol === undefined) {
107-
symbol = {
108-
iterations: {},
97+
if (predicateName !== undefined) {
98+
startLine = lineNumber;
99+
raStartLine = lineNumber + 1;
100+
}
101+
} else {
102+
const returnMatch = line.match(RETURN_REGEXP);
103+
if (returnMatch) {
104+
let symbol = symbols.predicates[predicateName];
105+
if (symbol === undefined) {
106+
symbol = {
107+
iterations: {},
108+
};
109+
symbols.predicates[predicateName] = symbol;
110+
}
111+
symbol.iterations[iteration] = {
112+
startLine,
113+
raStartLine,
114+
raEndLine: lineNumber,
109115
};
110-
symbols.predicates[predicateName] = symbol;
116+
117+
predicateName = undefined;
111118
}
112-
symbol.iterations[iteration] = {
113-
startLine: lineNumber,
114-
raStartLine,
115-
raEndLine,
116-
};
117119
}
120+
121+
lineNumber++;
118122
}
119-
}
120123

121-
return symbols;
124+
return symbols;
125+
} finally {
126+
stream.close();
127+
}
122128
}

0 commit comments

Comments
 (0)