Merge pull request #2858 from github/dbartol/long-strings

Dave Bartolomeo · web-flow · commit f1533dde2d08 · 2023-09-27T09:55:25.000-04:00
Use streaming when creating log symbols file.
diff --git a/extensions/ql-vscode/src/codeql-cli/cli.ts b/extensions/ql-vscode/src/codeql-cli/cli.ts
@@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
 import * as sarif from "sarif";
 import { SemVer } from "semver";
 import { Readable } from "stream";
-import { StringDecoder } from "string_decoder";
 import tk from "tree-kill";
 import { promisify } from "util";
 import { CancellationToken, Disposable, Uri } from "vscode";
@@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
 import { sarifParser } from "../common/sarif-parser";
 import { App } from "../common/app";
 import { QueryLanguage } from "../common/query-language";
+import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
 
 /**
  * The version of the SARIF format that we are using.
@@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
   }
 }
 
-/**
- * Buffer to hold state used when splitting a text stream into lines.
- */
-class SplitBuffer {
-  private readonly decoder = new StringDecoder("utf8");
-  private readonly maxSeparatorLength: number;
-  private buffer = "";
-  private searchIndex = 0;
-
-  constructor(private readonly separators: readonly string[]) {
-    this.maxSeparatorLength = separators
-      .map((s) => s.length)
-      .reduce((a, b) => Math.max(a, b), 0);
-  }
-
-  /**
-   * Append new text data to the buffer.
-   * @param chunk The chunk of data to append.
-   */
-  public addChunk(chunk: Buffer): void {
-    this.buffer += this.decoder.write(chunk);
-  }
-
-  /**
-   * Signal that the end of the input stream has been reached.
-   */
-  public end(): void {
-    this.buffer += this.decoder.end();
-    this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
-  }
-
-  /**
-   * A version of startsWith that isn't overriden by a broken version of ms-python.
-   *
-   * The definition comes from
-   * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
-   * which is CC0/public domain
-   *
-   * See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
-   */
-  private static startsWith(
-    s: string,
-    searchString: string,
-    position: number,
-  ): boolean {
-    const pos = position > 0 ? position | 0 : 0;
-    return s.substring(pos, pos + searchString.length) === searchString;
-  }
-
-  /**
-   * Extract the next full line from the buffer, if one is available.
-   * @returns The text of the next available full line (without the separator), or `undefined` if no
-   * line is available.
-   */
-  public getNextLine(): string | undefined {
-    while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
-      for (const separator of this.separators) {
-        if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
-          const line = this.buffer.slice(0, this.searchIndex);
-          this.buffer = this.buffer.slice(this.searchIndex + separator.length);
-          this.searchIndex = 0;
-          return line;
-        }
-      }
-      this.searchIndex++;
-    }
-
-    return undefined;
-  }
-}
-
-/**
- * Splits a text stream into lines based on a list of valid line separators.
- * @param stream The text stream to split. This stream will be fully consumed.
- * @param separators The list of strings that act as line separators.
- * @returns A sequence of lines (not including separators).
- */
-async function* splitStreamAtSeparators(
-  stream: Readable,
-  separators: string[],
-): AsyncGenerator<string, void, unknown> {
-  const buffer = new SplitBuffer(separators);
-  for await (const chunk of stream) {
-    buffer.addChunk(chunk);
-    let line: string | undefined;
-    do {
-      line = buffer.getNextLine();
-      if (line !== undefined) {
-        yield line;
-      }
-    } while (line !== undefined);
-  }
-  buffer.end();
-  let line: string | undefined;
-  do {
-    line = buffer.getNextLine();
-    if (line !== undefined) {
-      yield line;
-    }
-  } while (line !== undefined);
-}
-
-/**
- *  Standard line endings for splitting human-readable text.
- */
-const lineEndings = ["\r\n", "\r", "\n"];
-
 /**
  * Log a text stream to a `Logger` interface.
  * @param stream The stream to log.
  * @param logger The logger that will consume the stream output.
  */
 async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
-  for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
+  for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
     // Await the result of log here in order to ensure the logs are written in the correct order.
     await logger.log(line);
   }
diff --git a/extensions/ql-vscode/src/common/split-stream.ts b/extensions/ql-vscode/src/common/split-stream.ts
@@ -0,0 +1,125 @@
+import { Readable } from "stream";
+import { StringDecoder } from "string_decoder";
+
+/**
+ * Buffer to hold state used when splitting a text stream into lines.
+ */
+export class SplitBuffer {
+  private readonly decoder = new StringDecoder("utf8");
+  private readonly maxSeparatorLength: number;
+  private buffer = "";
+  private searchIndex = 0;
+  private ended = false;
+
+  constructor(private readonly separators: readonly string[]) {
+    this.maxSeparatorLength = separators
+      .map((s) => s.length)
+      .reduce((a, b) => Math.max(a, b), 0);
+  }
+
+  /**
+   * Append new text data to the buffer.
+   * @param chunk The chunk of data to append.
+   */
+  public addChunk(chunk: Buffer): void {
+    this.buffer += this.decoder.write(chunk);
+  }
+
+  /**
+   * Signal that the end of the input stream has been reached.
+   */
+  public end(): void {
+    this.buffer += this.decoder.end();
+    this.ended = true;
+  }
+
+  /**
+   * A version of startsWith that isn't overriden by a broken version of ms-python.
+   *
+   * The definition comes from
+   * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
+   * which is CC0/public domain
+   *
+   * See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
+   */
+  private static startsWith(
+    s: string,
+    searchString: string,
+    position: number,
+  ): boolean {
+    const pos = position > 0 ? position | 0 : 0;
+    return s.substring(pos, pos + searchString.length) === searchString;
+  }
+
+  /**
+   * Extract the next full line from the buffer, if one is available.
+   * @returns The text of the next available full line (without the separator), or `undefined` if no
+   * line is available.
+   */
+  public getNextLine(): string | undefined {
+    // If we haven't received all of the input yet, don't search too close to the end of the buffer,
+    // or we could match a separator that's split across two chunks. For example, we could see "\r"
+    // at the end of the buffer and match that, even though we were about to receive a "\n" right
+    // after it.
+    const maxSearchIndex = this.ended
+      ? this.buffer.length - 1
+      : this.buffer.length - this.maxSeparatorLength;
+    while (this.searchIndex <= maxSearchIndex) {
+      for (const separator of this.separators) {
+        if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
+          const line = this.buffer.slice(0, this.searchIndex);
+          this.buffer = this.buffer.slice(this.searchIndex + separator.length);
+          this.searchIndex = 0;
+          return line;
+        }
+      }
+      this.searchIndex++;
+    }
+
+    if (this.ended && this.buffer.length > 0) {
+      // If we still have some text left in the buffer, return it as the last line.
+      const line = this.buffer;
+      this.buffer = "";
+      this.searchIndex = 0;
+      return line;
+    } else {
+      return undefined;
+    }
+  }
+}
+
+/**
+ * Splits a text stream into lines based on a list of valid line separators.
+ * @param stream The text stream to split. This stream will be fully consumed.
+ * @param separators The list of strings that act as line separators.
+ * @returns A sequence of lines (not including separators).
+ */
+export async function* splitStreamAtSeparators(
+  stream: Readable,
+  separators: string[],
+): AsyncGenerator<string, void, unknown> {
+  const buffer = new SplitBuffer(separators);
+  for await (const chunk of stream) {
+    buffer.addChunk(chunk);
+    let line: string | undefined;
+    do {
+      line = buffer.getNextLine();
+      if (line !== undefined) {
+        yield line;
+      }
+    } while (line !== undefined);
+  }
+  buffer.end();
+  let line: string | undefined;
+  do {
+    line = buffer.getNextLine();
+    if (line !== undefined) {
+      yield line;
+    }
+  } while (line !== undefined);
+}
+
+/**
+ *  Standard line endings for splitting human-readable text.
+ */
+export const LINE_ENDINGS = ["\r\n", "\r", "\n"];
diff --git a/extensions/ql-vscode/src/log-insights/summary-parser.ts b/extensions/ql-vscode/src/log-insights/summary-parser.ts
@@ -1,4 +1,5 @@
-import { writeFile, promises } from "fs-extra";
+import { createReadStream, writeFile } from "fs-extra";
+import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";
 
 /**
  * Location information for a single pipeline invocation in the RA.
@@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
 async function generateSummarySymbols(
   summaryPath: string,
 ): Promise<SummarySymbols> {
-  const summary = await promises.readFile(summaryPath, {
+  const stream = createReadStream(summaryPath, {
     encoding: "utf-8",
   });
-  const symbols: SummarySymbols = {
-    predicates: {},
-  };
+  try {
+    const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);
 
-  const lines = summary.split(/\r?\n/);
-  let lineNumber = 0;
-  while (lineNumber < lines.length) {
-    const startLineNumber = lineNumber;
-    lineNumber++;
-    const startLine = lines[startLineNumber];
-    const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
-    let predicateName: string | undefined = undefined;
-    let iteration = 0;
-    if (nonRecursiveMatch) {
-      predicateName = nonRecursiveMatch.groups!.predicateName;
-    } else {
-      const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
-      if (recursiveMatch?.groups) {
-        predicateName = recursiveMatch.groups.predicateName;
-        iteration = parseInt(recursiveMatch.groups.iteration);
-      }
-    }
+    const symbols: SummarySymbols = {
+      predicates: {},
+    };
 
-    if (predicateName !== undefined) {
-      const raStartLine = lineNumber;
-      let raEndLine: number | undefined = undefined;
-      while (lineNumber < lines.length && raEndLine === undefined) {
-        const raLine = lines[lineNumber];
-        const returnMatch = raLine.match(RETURN_REGEXP);
-        if (returnMatch) {
-          raEndLine = lineNumber;
+    let lineNumber = 0;
+    let raStartLine = 0;
+    let iteration = 0;
+    let predicateName: string | undefined = undefined;
+    let startLine = 0;
+    for await (const line of lines) {
+      if (predicateName === undefined) {
+        // Looking for the start of the predicate.
+        const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
+        if (nonRecursiveMatch) {
+          iteration = 0;
+          predicateName = nonRecursiveMatch.groups!.predicateName;
+        } else {
+          const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
+          if (recursiveMatch?.groups) {
+            predicateName = recursiveMatch.groups.predicateName;
+            iteration = parseInt(recursiveMatch.groups.iteration);
+          }
         }
-        lineNumber++;
-      }
-      if (raEndLine !== undefined) {
-        let symbol = symbols.predicates[predicateName];
-        if (symbol === undefined) {
-          symbol = {
-            iterations: {},
+        if (predicateName !== undefined) {
+          startLine = lineNumber;
+          raStartLine = lineNumber + 1;
+        }
+      } else {
+        const returnMatch = line.match(RETURN_REGEXP);
+        if (returnMatch) {
+          let symbol = symbols.predicates[predicateName];
+          if (symbol === undefined) {
+            symbol = {
+              iterations: {},
+            };
+            symbols.predicates[predicateName] = symbol;
+          }
+          symbol.iterations[iteration] = {
+            startLine,
+            raStartLine,
+            raEndLine: lineNumber,
           };
-          symbols.predicates[predicateName] = symbol;
+
+          predicateName = undefined;
         }
-        symbol.iterations[iteration] = {
-          startLine: lineNumber,
-          raStartLine,
-          raEndLine,
-        };
       }
+
+      lineNumber++;
     }
-  }
 
-  return symbols;
+    return symbols;
+  } finally {
+    stream.close();
+  }
 }
diff --git a/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts b/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts