Skip to content

Commit 3c57597

Browse files
author
Dave Bartolomeo
committed
Share code for splitting records from pseudo-JSONL
1 parent e8d5029 commit 3c57597

3 files changed

Lines changed: 42 additions & 28 deletions

File tree

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import * as fs from 'fs-extra';
2+
3+
/**
4+
* Read a file consisting of multiple JSON objects. Each object is separated from the previous one
5+
* by a double newline sequence. This is basically a more human-readable form of JSONL.
6+
*
7+
* The current implementation reads the entire text of the document into memory, but in the future
8+
* it will stream the document to improve the performance with large documents.
9+
*
10+
* @param path The path to the file.
11+
* @param handler Callback to be invoked for each top-level JSON object in order.
12+
*/
13+
export async function readJsonlFile(path: string, handler: (value: any) => Promise<void>): Promise<void> {
14+
const logSummary = await fs.readFile(path, 'utf-8');
15+
16+
// Remove newline delimiters because summary is in .jsonl format.
17+
const jsonSummaryObjects: string[] = logSummary.split(/\r?\n\r?\n/g);
18+
19+
for (const obj of jsonSummaryObjects) {
20+
const jsonObj = JSON.parse(obj);
21+
await handler(jsonObj);
22+
}
23+
}

extensions/ql-vscode/src/pure/log-summary-parser.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { readJsonlFile } from '../log-insights/jsonl-reader';
2+
13
// TODO(angelapwen): Only load in necessary information and
24
// location in bytes for this log to save memory.
35
export interface EvalLogData {
@@ -13,14 +15,10 @@ export interface EvalLogData {
1315
* an array of EvalLogData objects.
1416
*
1517
*/
16-
export function parseViewerData(logSummary: string): EvalLogData[] {
17-
// Remove newline delimiters because summary is in .jsonl format.
18-
const jsonSummaryObjects: string[] = logSummary.split(/\r?\n\r?\n/g);
18+
export async function parseViewerData(jsonSummaryPath: string): Promise<EvalLogData[]> {
1919
const viewerData: EvalLogData[] = [];
2020

21-
for (const obj of jsonSummaryObjects) {
22-
const jsonObj = JSON.parse(obj);
23-
21+
await readJsonlFile(jsonSummaryPath, async jsonObj => {
2422
// Only convert log items that have an RA and millis field
2523
if (jsonObj.ra !== undefined && jsonObj.millis !== undefined) {
2624
const newLogData: EvalLogData = {
@@ -31,6 +29,7 @@ export function parseViewerData(logSummary: string): EvalLogData[] {
3129
};
3230
viewerData.push(newLogData);
3331
}
34-
}
32+
});
33+
3534
return viewerData;
3635
}

extensions/ql-vscode/src/query-history.ts

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import {
1818
window,
1919
workspace,
2020
} from 'vscode';
21-
import * as JsonlParser from 'stream-json/jsonl/Parser';
2221
import { QueryHistoryConfig } from './config';
2322
import {
2423
showAndLogErrorMessage,
@@ -53,6 +52,7 @@ import { EvalLogData, parseViewerData } from './pure/log-summary-parser';
5352
import { PipelineInfo, SummarySymbols } from './log-insights/summary-parser';
5453
import { DiagnosticSeverity } from 'vscode-languageclient';
5554
import { EvaluationLogProblemReporter, EvaluationLogScannerProvider } from './log-insights/log-scanner';
55+
import { readJsonlFile } from './log-insights/jsonl-reader';
5656

5757
/**
5858
* query-history.ts
@@ -983,7 +983,7 @@ export class QueryHistoryManager extends DisposableObject {
983983
}
984984

985985
// Summary log file doesn't exist.
986-
if (finalSingleItem.evalLogLocation && fs.pathExists(finalSingleItem.evalLogLocation)) {
986+
if (finalSingleItem.evalLogLocation && await fs.pathExists(finalSingleItem.evalLogLocation)) {
987987
// If raw log does exist, then the summary log is still being generated.
988988
this.warnInProgressEvalLogSummary();
989989
} else {
@@ -1008,14 +1008,13 @@ export class QueryHistoryManager extends DisposableObject {
10081008
}
10091009

10101010
// TODO(angelapwen): Stream the file in.
1011-
void fs.readFile(finalSingleItem.jsonEvalLogSummaryLocation, async (err, buffer) => {
1012-
if (err) {
1013-
throw new Error(`Could not read evaluator log summary JSON file to generate viewer data at ${finalSingleItem.jsonEvalLogSummaryLocation}.`);
1014-
}
1015-
const evalLogData: EvalLogData[] = parseViewerData(buffer.toString());
1011+
try {
1012+
const evalLogData: EvalLogData[] = await parseViewerData(finalSingleItem.jsonEvalLogSummaryLocation);
10161013
const evalLogTreeBuilder = new EvalLogTreeBuilder(finalSingleItem.getQueryName(), evalLogData);
10171014
this.evalLogViewer.updateRoots(await evalLogTreeBuilder.getRoots());
1018-
});
1015+
} catch (e) {
1016+
throw new Error(`Could not read evaluator log summary JSON file to generate viewer data at ${finalSingleItem.jsonEvalLogSummaryLocation}.`);
1017+
}
10191018
}
10201019

10211020
/**
@@ -1027,12 +1026,12 @@ export class QueryHistoryManager extends DisposableObject {
10271026
query: LocalQueryInfo
10281027
): Promise<void> {
10291028
this.diagnosticCollection.clear();
1030-
if (query.evalLogJsonSummaryLocation) {
1031-
const diagnostics = await this.scanLog(query.evalLogJsonSummaryLocation, query.evalLogSummarySymbolsLocation);
1029+
if (query.jsonEvalLogSummaryLocation) {
1030+
const diagnostics = await this.scanLog(query.jsonEvalLogSummaryLocation, query.evalLogSummarySymbolsLocation);
10321031
const uri = Uri.file(query.evalLogSummaryLocation!);
10331032
this.diagnosticCollection.set(uri, diagnostics);
10341033
} else {
1035-
this.warnNoEvalLog();
1034+
this.warnNoEvalLogs();
10361035
}
10371036
}
10381037

@@ -1244,17 +1243,10 @@ export class QueryHistoryManager extends DisposableObject {
12441243

12451244
const scanners = [...this.scannerProviders.values()].map(p => p.createScanner(problemReporter));
12461245

1247-
const stream = fs.createReadStream(jsonSummaryLocation)
1248-
.pipe(JsonlParser.parser())
1249-
.on('data', ({ value }) => {
1250-
scanners.forEach(scanner => {
1251-
scanner.onEvent(value);
1252-
});
1246+
await readJsonlFile(jsonSummaryLocation, async obj => {
1247+
scanners.forEach(scanner => {
1248+
scanner.onEvent(obj);
12531249
});
1254-
1255-
await new Promise(function(resolve, reject) {
1256-
stream.on('end', resolve);
1257-
stream.on('error', reject);
12581250
});
12591251

12601252
scanners.forEach(scanner => scanner.onDone());

0 commit comments

Comments
 (0)