Unit tests for SplitBuffer

Dave Bartolomeo · Dave Bartolomeo · commit 3c63df2221a1 · 2023-09-26T17:54:47.000-04:00
diff --git a/extensions/ql-vscode/src/common/split-stream.ts b/extensions/ql-vscode/src/common/split-stream.ts
@@ -4,11 +4,12 @@ import { StringDecoder } from "string_decoder";
 /**
  * Buffer to hold state used when splitting a text stream into lines.
  */
-class SplitBuffer {
+export class SplitBuffer {
   private readonly decoder = new StringDecoder("utf8");
   private readonly maxSeparatorLength: number;
   private buffer = "";
   private searchIndex = 0;
+  private ended = false;
 
   constructor(private readonly separators: readonly string[]) {
     this.maxSeparatorLength = separators
@@ -29,7 +30,7 @@ class SplitBuffer {
    */
   public end(): void {
     this.buffer += this.decoder.end();
-    this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
+    this.ended = true;
   }
 
   /**
@@ -56,7 +57,14 @@ class SplitBuffer {
    * line is available.
    */
   public getNextLine(): string | undefined {
-    while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
+    // If we haven't received all of the input yet, don't search too close to the end of the buffer,
+    // or we could match a separator that's split across two chunks. For example, we could see "\r"
+    // at the end of the buffer and match that, even though we were about to receive a "\n" right
+    // after it.
+    const maxSearchIndex = this.ended
+      ? this.buffer.length - 1
+      : this.buffer.length - this.maxSeparatorLength;
+    while (this.searchIndex <= maxSearchIndex) {
       for (const separator of this.separators) {
         if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
           const line = this.buffer.slice(0, this.searchIndex);
@@ -68,7 +76,15 @@ class SplitBuffer {
       this.searchIndex++;
     }
 
-    return undefined;
+    if (this.ended && this.buffer.length > 0) {
+      // If we still have some text left in the buffer, return it as the last line.
+      const line = this.buffer;
+      this.buffer = "";
+      this.searchIndex = 0;
+      return line;
+    } else {
+      return undefined;
+    }
   }
 }
 
diff --git a/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts b/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts
@@ -0,0 +1,84 @@
+import { LINE_ENDINGS, SplitBuffer } from "../../../src/common/split-stream";
+
+interface Chunk {
+  chunk: string;
+  lines: string[];
+}
+
+function checkLines(
+  buffer: SplitBuffer,
+  expectedLinesForChunk: string[],
+  chunkIndex: number | "end",
+): void {
+  expectedLinesForChunk.forEach((expectedLine, lineIndex) => {
+    const line = buffer.getNextLine();
+    const location = `[chunk ${chunkIndex}, line ${lineIndex}]: `;
+    expect(location + line).toEqual(location + expectedLine);
+  });
+  expect(buffer.getNextLine()).toBeUndefined();
+}
+
+function testSplitBuffer(chunks: Chunk[], endLines: string[]): void {
+  const buffer = new SplitBuffer(LINE_ENDINGS);
+  chunks.forEach((chunk, chunkIndex) => {
+    buffer.addChunk(Buffer.from(chunk.chunk, "utf-8"));
+    checkLines(buffer, chunk.lines, chunkIndex);
+  });
+  buffer.end();
+  checkLines(buffer, endLines, "end");
+}
+
+describe("split buffer", () => {
+  it("should handle a one-chunk string with no terminator", async () => {
+    // Won't return the line until we call `end()`.
+    testSplitBuffer([{ chunk: "some text", lines: [] }], ["some text"]);
+  });
+
+  it("should handle a one-chunk string with a one-byte terminator", async () => {
+    // Won't return the line until we call `end()` because the actual terminator is shorter than the
+    // longest terminator.
+    testSplitBuffer([{ chunk: "some text\n", lines: [] }], ["some text"]);
+  });
+
+  it("should handle a one-chunk string with a two-byte terminator", async () => {
+    testSplitBuffer([{ chunk: "some text\r\n", lines: ["some text"] }], []);
+  });
+
+  it("should handle a multi-chunk string with terminators at the end of each chunk", async () => {
+    testSplitBuffer(
+      [
+        { chunk: "first line\n", lines: [] }, // Waiting for second potential terminator byte
+        { chunk: "second line\r", lines: ["first line"] }, // Waiting for second potential terminator byte
+        { chunk: "third line\r\n", lines: ["second line", "third line"] }, // No wait, because we're at the end
+      ],
+      [],
+    );
+  });
+
+  it("should handle a multi-chunk string with terminators at random offsets", async () => {
+    testSplitBuffer(
+      [
+        { chunk: "first line\nsecond", lines: ["first line"] },
+        {
+          chunk: " line\rthird line",
+          lines: ["second line"],
+        },
+        { chunk: "\r\n", lines: ["third line"] },
+      ],
+      [],
+    );
+  });
+
+  it("should handle a terminator split between chunks", async () => {
+    testSplitBuffer(
+      [
+        { chunk: "first line\r", lines: [] },
+        {
+          chunk: "\nsecond line",
+          lines: ["first line"],
+        },
+      ],
+      ["second line"],
+    );
+  });
+});