Skip to content

Commit 6a251f2

Browse files
committed
chore: address PR review feedback on batch sizing and chunk extraction
- Fallback to 25 instead of total keys for batch size - Add Zod max(250) validation for batchSize flag - Extract duplicate JSON chunking repair logic to utils - Revert unintended export from react/src/client/index.ts - Format modified files with Prettier
1 parent 7328326 commit 6a251f2

File tree

8 files changed

+240
-307
lines changed

8 files changed

+240
-307
lines changed

packages/cli/src/cli/cmd/i18n.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,8 @@ export default new Command()
437437
}
438438

439439
bucketOra.start(
440-
`[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length
440+
`[${sourceLocale} -> ${targetLocale}] [${
441+
Object.keys(processableData).length
441442
} entries] (0%) AI localization in progress...`,
442443
);
443444
let processPayload = createProcessor(i18nConfig!.provider, {
@@ -462,8 +463,9 @@ export default new Command()
462463
targetData: flags.force ? {} : targetData,
463464
},
464465
(progress, sourceChunk, processedChunk) => {
465-
bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length
466-
} entries] (${progress}%) AI localization in progress...`;
466+
bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${
467+
Object.keys(processableData).length
468+
} entries] (${progress}%) AI localization in progress...`;
467469
},
468470
);
469471

@@ -666,7 +668,7 @@ function parseFlags(options: any) {
666668
file: Z.array(Z.string()).optional(),
667669
interactive: Z.boolean().prefault(false),
668670
debug: Z.boolean().prefault(false),
669-
batchSize: Z.number().min(1).optional(),
671+
batchSize: Z.number().min(1).max(250).optional(),
670672
}).parse(options);
671673
}
672674

packages/cli/src/cli/cmd/run/_types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@ export const flagsSchema = z.object({
5656
debounce: z.number().positive().prefault(5000), // 5 seconds default
5757
sound: z.boolean().optional(),
5858
pseudo: z.boolean().optional(),
59-
batchSize: z.number().min(1).optional(),
59+
batchSize: z.number().min(1).max(250).optional(),
6060
});
6161
export type CmdRunFlags = z.infer<typeof flagsSchema>;

packages/cli/src/cli/cmd/run/setup.ts

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,23 +110,23 @@ export default async function setup(input: CmdRunContext) {
110110

111111
const subTasks = isLingoDotDev
112112
? [
113-
"Brand voice enabled",
114-
"Translation memory connected",
115-
"Glossary enabled",
116-
"Quality assurance enabled",
117-
].map((title) => ({ title, task: () => { } }))
113+
"Brand voice enabled",
114+
"Translation memory connected",
115+
"Glossary enabled",
116+
"Quality assurance enabled",
117+
].map((title) => ({ title, task: () => {} }))
118118
: isPseudo
119119
? [
120-
"Pseudo-localization mode active",
121-
"Character replacement configured",
122-
"No external API calls",
123-
].map((title) => ({ title, task: () => { } }))
120+
"Pseudo-localization mode active",
121+
"Character replacement configured",
122+
"No external API calls",
123+
].map((title) => ({ title, task: () => {} }))
124124
: [
125-
"Skipping brand voice",
126-
"Skipping glossary",
127-
"Skipping translation memory",
128-
"Skipping quality assurance",
129-
].map((title) => ({ title, task: () => { }, skip: true }));
125+
"Skipping brand voice",
126+
"Skipping glossary",
127+
"Skipping translation memory",
128+
"Skipping quality assurance",
129+
].map((title) => ({ title, task: () => {}, skip: true }));
130130

131131
return task.newListr(subTasks, {
132132
concurrent: true,

packages/cli/src/cli/localizer/explicit.ts

Lines changed: 25 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { colors } from "../constants";
1212
import { jsonrepair } from "jsonrepair";
1313
import { createOllama } from "ollama-ai-provider-v2";
1414
import _ from "lodash";
15-
15+
import { extractPayloadChunks } from "../utils/chunk";
1616
export default function createExplicitLocalizer(
1717
provider: NonNullable<I18nConfig["provider"]>,
1818
batchSize?: number,
@@ -28,10 +28,10 @@ export default function createExplicitLocalizer(
2828
To fix this issue:
2929
1. Switch to one of the supported providers, or
3030
2. Remove the ${chalk.italic(
31-
"provider",
32-
)} node from your i18n.json configuration to switch to ${chalk.hex(
33-
colors.green,
34-
)("Lingo.dev")}
31+
"provider",
32+
)} node from your i18n.json configuration to switch to ${chalk.hex(
33+
colors.green,
34+
)("Lingo.dev")}
3535
3636
${chalk.hex(colors.blue)("Docs: https://lingo.dev/go/docs")}
3737
`,
@@ -136,19 +136,21 @@ function createAiSdkLocalizer(params: {
136136
if (!skipAuth && (!apiKey || !params.apiKeyName)) {
137137
throw new Error(
138138
dedent`
139-
You're trying to use raw ${chalk.dim(params.id)} API for translation. ${params.apiKeyName
140-
? `However, ${chalk.dim(
141-
params.apiKeyName,
142-
)} environment variable is not set.`
143-
: "However, that provider is unavailable."
139+
You're trying to use raw ${chalk.dim(params.id)} API for translation. ${
140+
params.apiKeyName
141+
? `However, ${chalk.dim(
142+
params.apiKeyName,
143+
)} environment variable is not set.`
144+
: "However, that provider is unavailable."
144145
}
145146
146147
To fix this issue:
147-
1. ${params.apiKeyName
148-
? `Set ${chalk.dim(
149-
params.apiKeyName,
150-
)} in your environment variables`
151-
: "Set the environment variable for your provider (if required)"
148+
1. ${
149+
params.apiKeyName
150+
? `Set ${chalk.dim(
151+
params.apiKeyName,
152+
)} in your environment variables`
153+
: "Set the environment variable for your provider (if required)"
152154
}, or
153155
2. Remove the ${chalk.italic(
154156
"provider",
@@ -295,30 +297,17 @@ function createAiSdkLocalizer(params: {
295297
// Handle both object and string responses
296298
if (typeof result?.data === "object" && result.data !== null) {
297299
finalResult = result.data;
298-
} else if (result?.data) {
299-
// Handle string responses - extract and repair JSON
300-
const index = result.data.indexOf("{");
301-
const lastIndex = result.data.lastIndexOf("}");
302-
if (index !== -1 && lastIndex !== -1) {
303-
try {
304-
const trimmed = result.data.slice(index, lastIndex + 1);
305-
const repaired = jsonrepair(trimmed);
306-
const parsed = JSON.parse(repaired);
307-
finalResult = parsed.data || parsed || {};
308-
} catch (e) {
309-
console.error(
310-
`Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`,
311-
);
312-
throw new Error(
313-
`Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`,
314-
);
315-
}
316-
} else {
300+
} else if (typeof result?.data === "string") {
301+
// Handle string responses where the model double-stringified the JSON
302+
try {
303+
const parsed = parseModelResponse(result.data);
304+
finalResult = parsed.data || parsed || {};
305+
} catch (e) {
317306
console.error(
318-
`Unexpected response format - no JSON object found. Snippet: ${String(result.data).slice(0, 100)}...`,
307+
`Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`,
319308
);
320309
throw new Error(
321-
`Unexpected response format from ${params.id} - no JSON object found in response`,
310+
`Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`,
322311
);
323312
}
324313
}
@@ -334,63 +323,3 @@ function createAiSdkLocalizer(params: {
334323
},
335324
};
336325
}
337-
338-
/**
339-
* Extract payload chunks based on the ideal chunk size
340-
* @param payload - The payload to be chunked
341-
* @param batchSize - Max number of keys per chunk (default: 25)
342-
* @returns An array of payload chunks
343-
*/
344-
function extractPayloadChunks(
345-
payload: Record<string, any>,
346-
batchSize?: number,
347-
): Record<string, any>[] {
348-
const idealBatchItemSize = 250;
349-
const result: Record<string, any>[] = [];
350-
let currentChunk: Record<string, any> = {};
351-
let currentChunkItemCount = 0;
352-
353-
const payloadEntries = Object.entries(payload);
354-
for (let i = 0; i < payloadEntries.length; i++) {
355-
const [key, value] = payloadEntries[i];
356-
currentChunk[key] = value;
357-
currentChunkItemCount++;
358-
359-
const currentChunkSize = countWordsInRecord(currentChunk);
360-
const effectiveBatchSize =
361-
batchSize && batchSize > 0 ? batchSize : payloadEntries.length || 1;
362-
if (
363-
currentChunkSize > idealBatchItemSize ||
364-
currentChunkItemCount >= effectiveBatchSize ||
365-
i === payloadEntries.length - 1
366-
) {
367-
result.push(currentChunk);
368-
currentChunk = {};
369-
currentChunkItemCount = 0;
370-
}
371-
}
372-
373-
return result;
374-
}
375-
376-
/**
377-
* Count words in a record or array
378-
* @param payload - The payload to count words in
379-
* @returns The total number of words
380-
*/
381-
function countWordsInRecord(
382-
payload: any | Record<string, any> | Array<any>,
383-
): number {
384-
if (Array.isArray(payload)) {
385-
return payload.reduce((acc, item) => acc + countWordsInRecord(item), 0);
386-
} else if (typeof payload === "object" && payload !== null) {
387-
return Object.values(payload).reduce(
388-
(acc: number, item) => acc + countWordsInRecord(item),
389-
0,
390-
);
391-
} else if (typeof payload === "string") {
392-
return payload.trim().split(/\s+/).filter(Boolean).length;
393-
} else {
394-
return 0;
395-
}
396-
}

0 commit comments

Comments
 (0)