Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions src/__tests__/utils/output.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import {describe, it, expect, vi, beforeEach, afterEach} from 'vitest';
import fs from 'fs';
import path from 'path';
import os from 'os';
import {serialize, format_from_ext, print} from '../../utils/output';

describe('utils/output.serialize csv', ()=>{
it('serializes array of flat objects as RFC 4180 CSV with header row', ()=>{
const rows = [
{url: 'https://a.test/1', title: 'A', price: 1.5},
{url: 'https://a.test/2', title: 'B', price: 2.0},
];
const out = serialize(rows, 'csv');
const lines = out.trim().split('\n');
expect(lines[0]).toBe('url,title,price');
expect(lines[1]).toBe('https://a.test/1,A,1.5');
expect(lines[2]).toBe('https://a.test/2,B,2');
});

it('quotes and escapes embedded commas, quotes, and newlines', ()=>{
const rows = [{name: 'Smith, John', note: 'He said "hi"'},
{name: 'multi\nline', note: 'ok'}];
const out = serialize(rows, 'csv');
const lines = out.trim().split(/\n/);
// header
expect(lines[0]).toBe('name,note');
// row 1: both fields need quoting; embedded quote doubled
expect(lines[1]).toBe('"Smith, John","He said ""hi"""');
});

it('unions keys across heterogeneous rows', ()=>{
const rows = [{a: 1, b: 2}, {a: 3, c: 4}];
const out = serialize(rows, 'csv');
const lines = out.trim().split('\n');
expect(lines[0]).toBe('a,b,c');
expect(lines[1]).toBe('1,2,');
expect(lines[2]).toBe('3,,4');
});

it('wraps a single object as one CSV row', ()=>{
const out = serialize({a: 1, b: 'x'}, 'csv');
expect(out.trim()).toBe('a,b\n1,x');
});

it('serializes nested values via JSON', ()=>{
const rows = [{id: 1, meta: {tag: 'x'}}];
const out = serialize(rows, 'csv');
const lines = out.trim().split('\n');
expect(lines[1]).toBe('1,"{""tag"":""x""}"');
});
});

describe('utils/output.serialize markdown', ()=>{
it('renders an array of objects as a Markdown table', ()=>{
const rows = [{a: 1, b: 'x'}, {a: 2, b: 'y'}];
const out = serialize(rows, 'markdown');
expect(out).toContain('| a | b |');
expect(out).toContain('| --- | --- |');
expect(out).toContain('| 1 | x |');
expect(out).toContain('| 2 | y |');
});

it('escapes pipes and newlines inside cells', ()=>{
const rows = [{a: 'a|b', b: 'line1\nline2'}];
const out = serialize(rows, 'markdown');
expect(out).toContain('| a\\|b | line1 line2 |');
});

it('falls back to a fenced JSON block for non-tabular data', ()=>{
const out = serialize([1, 2, 3], 'markdown');
expect(out.startsWith('```json')).toBe(true);
});
});

describe('utils/output.serialize html', ()=>{
it('renders an array of objects as an HTML table', ()=>{
const rows = [{a: 1, b: '<x>'}];
const out = serialize(rows, 'html');
expect(out).toContain('<thead><tr><th>a</th><th>b</th></tr></thead>');
expect(out).toContain('<td>1</td><td>&lt;x&gt;</td>');
});

it('escapes HTML in non-tabular fallback', ()=>{
const out = serialize('<script>', 'html');
expect(out).toBe('<script>');
});
});

describe('utils/output.format_from_ext', ()=>{
it('maps known extensions', ()=>{
expect(format_from_ext('a.json')).toBe('json');
expect(format_from_ext('a.CSV')).toBe('csv');
expect(format_from_ext('a.md')).toBe('markdown');
expect(format_from_ext('a.html')).toBe('html');
});

it('returns null for unknown extensions', ()=>{
expect(format_from_ext('a.txt')).toBeNull();
expect(format_from_ext('noext')).toBeNull();
});

it('rejects .xlsx with a helpful message and exits 1', ()=>{
const exit = vi.spyOn(process, 'exit').mockImplementation(
((_code?: number)=>{ throw new Error('exit'); }) as never);
const err = vi.spyOn(console, 'error').mockImplementation(()=>{});
expect(()=>format_from_ext('out.xlsx')).toThrow('exit');
const msg = err.mock.calls.map(c=>c.join(' ')).join(' ');
expect(msg).toMatch(/XLSX output is not supported/);
expect(msg).toMatch(/--pretty -o file\.json/);
expect(msg).toMatch(/brightdata\.com\/cp\/scrapers/);
exit.mockRestore();
err.mockRestore();
});
});

describe('utils/output.print writes correct format from extension', ()=>{
const tmp_files: string[] = [];
const make_tmp = (ext: string)=>{
const p = path.join(os.tmpdir(),
`bdata-output-test-${Date.now()}-${Math.random()}${ext}`);
tmp_files.push(p);
return p;
};
beforeEach(()=>{ vi.spyOn(console, 'error').mockImplementation(()=>{}); });
afterEach(()=>{
vi.restoreAllMocks();
for (const f of tmp_files) { try { fs.unlinkSync(f); } catch {} }
});

it('-o file.csv writes CSV (regression: was silently writing JSON)', ()=>{
const out = make_tmp('.csv');
print([{url: 'https://x.test', title: 'T'}], {output: out});
const content = fs.readFileSync(out, 'utf8');
expect(content.split('\n')[0]).toBe('url,title');
expect(content.split('\n')[1]).toBe('https://x.test,T');
});

it('-o file.html writes HTML (regression: was silently writing JSON)', ()=>{
const out = make_tmp('.html');
print([{a: 1}], {output: out});
const content = fs.readFileSync(out, 'utf8');
expect(content).toContain('<table>');
});

it('-o file.md writes Markdown (regression: was silently writing JSON)', ()=>{
const out = make_tmp('.md');
print([{a: 1}], {output: out});
const content = fs.readFileSync(out, 'utf8');
expect(content).toContain('| a |');
});

it('-o file.json writes JSON unchanged', ()=>{
const out = make_tmp('.json');
print([{a: 1}], {output: out});
const content = fs.readFileSync(out, 'utf8');
expect(JSON.parse(content)).toEqual([{a: 1}]);
});
});
122 changes: 121 additions & 1 deletion src/utils/output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,24 @@ const dim = (s: string)=>ansi('2', s);
const success = (msg: string)=>console.error(green(`✓ ${msg}`));
const warn = (msg: string)=>console.error(yellow(`⚠ ${msg}`));
const info = (msg: string)=>console.error(dim(msg));
const fail = (msg: string)=>{ console.error(red(`✗ ${msg}`));
const fail = (msg: string)=>{ console.error(red(`✗ ${msg}`));
process.exit(1); };

type Output_format = 'markdown'|'json'|'pretty'|'html'|'csv'|'raw';

const UNSUPPORTED_EXTS: Record<string, string> = {
'.xlsx': 'XLSX output is not supported. Use --pretty -o file.json '
+'and convert with a tool like xlsx-cli, or download as XLSX '
+'from the Bright Data web UI (https://brightdata.com/cp/scrapers).',
'.xls': 'XLS output is not supported. Use --pretty -o file.json '
+'and convert with a tool like xlsx-cli, or download from the '
+'Bright Data web UI (https://brightdata.com/cp/scrapers).',
};

const format_from_ext = (file_path: string): Output_format|null=>{
const ext = path.extname(file_path).toLowerCase();
if (UNSUPPORTED_EXTS[ext])
fail(UNSUPPORTED_EXTS[ext]);
if (ext == '.json') return 'json';
if (ext == '.md') return 'markdown';
if (ext == '.html') return 'html';
Expand All @@ -35,11 +46,120 @@ type Print_opts = {
format?: Output_format;
};

const to_rows = (data: unknown): Record<string, unknown>[]|null=>{
if (Array.isArray(data) && data.length
&& data.every(d=>d && typeof d == 'object' && !Array.isArray(d)))
{
return data as Record<string, unknown>[];
}
if (data && typeof data == 'object' && !Array.isArray(data))
return [data as Record<string, unknown>];
return null;
};

const collect_keys = (rows: Record<string, unknown>[]): string[]=>{
const seen = new Set<string>();
const ordered: string[] = [];
for (const r of rows)
{
for (const k of Object.keys(r))
{
if (!seen.has(k))
{
seen.add(k);
ordered.push(k);
}
}
}
return ordered;
};

const cell_to_string = (val: unknown): string=>{
if (val === null || val === undefined)
return '';
if (typeof val == 'string')
return val;
if (typeof val == 'number' || typeof val == 'boolean')
return String(val);
return JSON.stringify(val);
};

const csv_escape = (val: unknown): string=>{
const s = cell_to_string(val);
if (/[",\r\n]/.test(s))
return '"'+s.replace(/"/g, '""')+'"';
return s;
};

const serialize_csv = (data: unknown): string=>{
if (typeof data == 'string')
return data;
const rows = to_rows(data);
if (!rows)
{
warn('CSV requires an object or array of objects; falling back '
+'to JSON. Use --json to silence this warning.');
return JSON.stringify(data, null, 2);
}
const keys = collect_keys(rows);
const header = keys.map(csv_escape).join(',');
const body = rows.map(r=>keys.map(k=>csv_escape(r[k])).join(',')).join('\n');
return header+'\n'+body+'\n';
};

const md_escape = (val: unknown): string=>
cell_to_string(val).replace(/\|/g, '\\|').replace(/\r?\n/g, ' ');

const serialize_markdown = (data: unknown): string=>{
if (typeof data == 'string')
return data;
const rows = to_rows(data);
if (!rows)
return '```json\n'+JSON.stringify(data, null, 2)+'\n```\n';
const keys = collect_keys(rows);
const header = '| '+keys.join(' | ')+' |';
const divider = '| '+keys.map(()=>'---').join(' | ')+' |';
const body = rows.map(r=>
'| '+keys.map(k=>md_escape(r[k])).join(' | ')+' |').join('\n');
return [header, divider, body].join('\n')+'\n';
};

const html_escape = (val: unknown): string=>
cell_to_string(val)
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;');

const serialize_html = (data: unknown): string=>{
if (typeof data == 'string')
return data;
const rows = to_rows(data);
if (!rows)
return '<pre>'+html_escape(JSON.stringify(data, null, 2))+'</pre>\n';
const keys = collect_keys(rows);
const thead = '<thead><tr>'
+keys.map(k=>'<th>'+html_escape(k)+'</th>').join('')
+'</tr></thead>';
const tbody = '<tbody>'
+rows.map(r=>'<tr>'
+keys.map(k=>'<td>'+html_escape(r[k])+'</td>').join('')
+'</tr>').join('')
+'</tbody>';
return '<table>'+thead+tbody+'</table>\n';
};

const serialize = (data: unknown, fmt: Output_format): string=>{
if (fmt == 'pretty')
return JSON.stringify(data, null, 2);
if (fmt == 'json')
return JSON.stringify(data);
if (fmt == 'csv')
return serialize_csv(data);
if (fmt == 'markdown')
return serialize_markdown(data);
if (fmt == 'html')
return serialize_html(data);
if (typeof data == 'string')
return data;
return JSON.stringify(data, null, 2);
Expand Down