Skip to content

Commit c0a1533

Browse files
Refactor VLM model type resolution and system prompt handling
- Updated the logic for determining the effective system prompt to handle empty strings. - Introduced a new function to resolve the effective VLM model type based on options, simplifying the code in `rac_vlm_llamacpp_process`. - Improved download progress tracking in `ModelDownloader` by using cumulative byte counts for better accuracy. - Enhanced type exports in `llamacpp` and `onnx` packages for better compatibility and clarity. - Adjusted regex for identifying Qwen VL models to ensure more accurate matching.
1 parent 0af259c commit c0a1533

6 files changed

Lines changed: 70 additions & 48 deletions

File tree

sdk/runanywhere-commons/src/backends/llamacpp/rac_vlm_llamacpp.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ std::string format_vlm_prompt_with_template(llama_model* model, const std::strin
178178
}
179179

180180
// Resolve system prompt: use explicit value, or inject a default for Qwen2-VL
181-
const char* effective_system = system_prompt;
181+
const char* effective_system = (system_prompt && system_prompt[0] != '\0') ? system_prompt : nullptr;
182182
if (!effective_system && model_type == VLMModelType::Qwen2VL) {
183183
effective_system = "You are a helpful assistant.";
184184
}
@@ -208,15 +208,14 @@ std::string format_vlm_prompt_with_template(llama_model* model, const std::strin
208208
return formatted;
209209
}
210210
}
211-
bool has_explicit_system = (system_prompt && system_prompt[0] != '\0');
212-
if (has_explicit_system) {
211+
if (effective_system) {
213212
RAC_LOG_WARNING(LOG_CAT, "Template with system failed (size=%d); falling back to manual to preserve explicit system prompt", size);
214213
} else {
215214
RAC_LOG_WARNING(LOG_CAT, "llama_chat_apply_template with system failed (size=%d), trying without", size);
216215
}
217216
// If the caller passed an explicit system prompt, skip user-only
218217
// template to avoid silently dropping it -- go straight to manual.
219-
if (has_explicit_system) {
218+
if (effective_system) {
220219
goto manual_fallback;
221220
}
222221
}
@@ -376,6 +375,22 @@ void configure_sampler(LlamaCppVLMBackend* backend, const rac_vlm_options_t* opt
376375
temperature, top_p);
377376
}
378377

378+
/**
379+
* Resolve the effective VLM model type from options override or auto-detected default.
380+
*/
381+
static VLMModelType resolve_effective_model_type(VLMModelType detected,
382+
const rac_vlm_options_t* options) {
383+
if (options && options->model_family != RAC_VLM_MODEL_FAMILY_AUTO) {
384+
switch (options->model_family) {
385+
case RAC_VLM_MODEL_FAMILY_QWEN2_VL: return VLMModelType::Qwen2VL;
386+
case RAC_VLM_MODEL_FAMILY_SMOLVLM: return VLMModelType::SmolVLM;
387+
case RAC_VLM_MODEL_FAMILY_LLAVA: return VLMModelType::LLaVA;
388+
default: return VLMModelType::Generic;
389+
}
390+
}
391+
return detected;
392+
}
393+
379394
} // namespace
380395

381396
// =============================================================================
@@ -629,15 +644,7 @@ rac_result_t rac_vlm_llamacpp_process(rac_handle_t handle, const rac_vlm_image_t
629644
backend->n_past = 0;
630645

631646
// Resolve effective model type: options override > auto-detected at load time
632-
VLMModelType effective_model_type = backend->model_type;
633-
if (options && options->model_family != RAC_VLM_MODEL_FAMILY_AUTO) {
634-
switch (options->model_family) {
635-
case RAC_VLM_MODEL_FAMILY_QWEN2_VL: effective_model_type = VLMModelType::Qwen2VL; break;
636-
case RAC_VLM_MODEL_FAMILY_SMOLVLM: effective_model_type = VLMModelType::SmolVLM; break;
637-
case RAC_VLM_MODEL_FAMILY_LLAVA: effective_model_type = VLMModelType::LLaVA; break;
638-
default: effective_model_type = VLMModelType::Generic; break;
639-
}
640-
}
647+
VLMModelType effective_model_type = resolve_effective_model_type(backend->model_type, options);
641648

642649
const char* system_prompt = (options && options->system_prompt) ? options->system_prompt : nullptr;
643650

@@ -772,6 +779,7 @@ rac_result_t rac_vlm_llamacpp_process(rac_handle_t handle, const rac_vlm_image_t
772779

773780
for (int i = 0; i < max_tokens && !backend->cancel_requested; i++) {
774781
// Diagnostic: on first token, inspect logits for NaN/corruption
782+
#ifdef RAC_VLM_ENABLE_DIAGNOSTICS
775783
if (i == 0) {
776784
float* logits = llama_get_logits(backend->ctx);
777785
int n_vocab = llama_vocab_n_tokens(vocab);
@@ -806,6 +814,7 @@ rac_result_t rac_vlm_llamacpp_process(rac_handle_t handle, const rac_vlm_image_t
806814
top5_idx[4], top5_val[4]);
807815
}
808816
}
817+
#endif
809818

810819
llama_token token = llama_sampler_sample(backend->sampler, backend->ctx, -1);
811820
llama_sampler_accept(backend->sampler, token);
@@ -889,15 +898,7 @@ rac_result_t rac_vlm_llamacpp_process_stream(rac_handle_t handle, const rac_vlm_
889898
RAC_LOG_DEBUG(LOG_CAT, "Cleared KV cache for new request");
890899

891900
// Resolve effective model type: options override > auto-detected at load time
892-
VLMModelType effective_model_type = backend->model_type;
893-
if (options && options->model_family != RAC_VLM_MODEL_FAMILY_AUTO) {
894-
switch (options->model_family) {
895-
case RAC_VLM_MODEL_FAMILY_QWEN2_VL: effective_model_type = VLMModelType::Qwen2VL; break;
896-
case RAC_VLM_MODEL_FAMILY_SMOLVLM: effective_model_type = VLMModelType::SmolVLM; break;
897-
case RAC_VLM_MODEL_FAMILY_LLAVA: effective_model_type = VLMModelType::LLaVA; break;
898-
default: effective_model_type = VLMModelType::Generic; break;
899-
}
900-
}
901+
VLMModelType effective_model_type = resolve_effective_model_type(backend->model_type, options);
901902

902903
const char* system_prompt = (options && options->system_prompt) ? options->system_prompt : nullptr;
903904

sdk/runanywhere-web/packages/core/src/Infrastructure/ModelDownloader.ts

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -221,22 +221,21 @@ export class ModelDownloader {
221221

222222
try {
223223
const totalFiles = 1 + (model.additionalFiles?.length ?? 0);
224-
let totalBytesDownloaded = 0;
225-
let totalBytesExpected = 0;
224+
let cumulativeBytesDownloaded = 0;
225+
let cumulativeBytesExpected = 0;
226+
const completedFileSizes: number[] = [];
226227

227-
// Try streaming the primary file directly to storage (keeps memory constant).
228-
// Falls back to buffered download + store if streaming is not possible.
229228
const primaryProgressCb = (progress: number, bytesDown: number, bytesTotal: number) => {
230-
totalBytesDownloaded = bytesDown;
231-
totalBytesExpected = bytesTotal * totalFiles;
229+
cumulativeBytesDownloaded = bytesDown;
230+
cumulativeBytesExpected = bytesTotal * totalFiles;
232231
const overallProgress = progress / totalFiles;
233232
this.registry.updateModel(modelId, { downloadProgress: overallProgress });
234233
this.emitDownloadProgress({
235234
modelId,
236235
stage: DownloadStage.Downloading,
237236
progress: overallProgress,
238-
bytesDownloaded: totalBytesDownloaded,
239-
totalBytes: totalBytesExpected,
237+
bytesDownloaded: cumulativeBytesDownloaded,
238+
totalBytes: cumulativeBytesExpected,
240239
currentFile: model.url.split('/').pop(),
241240
filesCompleted: 0,
242241
filesTotal: totalFiles,
@@ -249,13 +248,18 @@ export class ModelDownloader {
249248
await this.storeInOPFS(modelId, primaryData);
250249
primarySize = primaryData.length;
251250
}
251+
completedFileSizes.push(primarySize);
252252

253253
// Download additional files (e.g., mmproj for VLM)
254254
if (model.additionalFiles && model.additionalFiles.length > 0) {
255255
for (let i = 0; i < model.additionalFiles.length; i++) {
256256
const file = model.additionalFiles[i];
257257
const fileKey = this.additionalFileKey(modelId, file.filename);
258+
const priorCompleted = completedFileSizes.reduce((a, b) => a + b, 0);
259+
258260
const fileProgressCb = (progress: number, bytesDown: number, bytesTotal: number) => {
261+
cumulativeBytesDownloaded = priorCompleted + bytesDown;
262+
cumulativeBytesExpected = priorCompleted + bytesTotal;
259263
const baseProgress = (1 + i) / totalFiles;
260264
const fileProgress = progress / totalFiles;
261265
const overallProgress = baseProgress + fileProgress;
@@ -264,42 +268,40 @@ export class ModelDownloader {
264268
modelId,
265269
stage: DownloadStage.Downloading,
266270
progress: overallProgress,
267-
bytesDownloaded: bytesDown,
268-
totalBytes: bytesTotal,
271+
bytesDownloaded: cumulativeBytesDownloaded,
272+
totalBytes: cumulativeBytesExpected,
269273
currentFile: file.filename,
270274
filesCompleted: 1 + i,
271275
filesTotal: totalFiles,
272276
});
273277
};
274278

279+
let fileSize: number;
275280
const streamedSize = await this.downloadAndStoreStreaming(file.url, fileKey, fileProgressCb);
276281
if (streamedSize === null) {
277282
const fileData = await this.downloadFile(file.url, fileProgressCb);
278283
await this.storeInOPFS(fileKey, fileData);
284+
fileSize = fileData.length;
285+
} else {
286+
fileSize = streamedSize;
279287
}
288+
completedFileSizes.push(fileSize);
280289
}
281290
}
282291

292+
const totalSize = completedFileSizes.reduce((a, b) => a + b, 0);
293+
283294
// Validating stage
284295
this.emitDownloadProgress({
285296
modelId,
286297
stage: DownloadStage.Validating,
287298
progress: 0.95,
288-
bytesDownloaded: totalBytesDownloaded,
289-
totalBytes: totalBytesExpected,
299+
bytesDownloaded: totalSize,
300+
totalBytes: totalSize,
290301
filesCompleted: totalFiles,
291302
filesTotal: totalFiles,
292303
});
293304

294-
let totalSize = primarySize;
295-
if (model.additionalFiles) {
296-
for (const file of model.additionalFiles) {
297-
const fileKey = this.additionalFileKey(modelId, file.filename);
298-
const size = await this.storage.getFileSize(fileKey);
299-
if (size !== null) totalSize += size;
300-
}
301-
}
302-
303305
this.registry.updateModel(modelId, {
304306
status: ModelStatus.Downloaded,
305307
downloadProgress: 1,

sdk/runanywhere-web/packages/llamacpp/src/Infrastructure/VLMWorkerBridge.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ export class VLMWorkerBridge {
251251
// TODO: re-test on WebGPU periodically as llama.cpp's WebGPU backend
252252
// matures — the Vulkan fp16 FA fix (b8168) may eventually be ported.
253253
const bridge = LlamaCppBridge.shared;
254-
const isQwenVL = /qwen/i.test(params.modelId) || /qwen/i.test(params.modelName);
254+
const isQwenVL = /qwen.*vl/i.test(params.modelId) || /qwen.*vl/i.test(params.modelName);
255255
if (isQwenVL && bridge.accelerationMode === 'webgpu') {
256256
const currentUrl = bridge.wasmUrl ?? '';
257257
const cpuUrl = currentUrl.replace(/-webgpu\.js$/, '.js');

sdk/runanywhere-web/packages/llamacpp/src/Infrastructure/VLMWorkerRuntime.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,22 @@ let offsets: AllOffsets | null = null;
4747
// module's _rac_wasm_offsetof_* / _rac_wasm_sizeof_* exports.
4848
// ---------------------------------------------------------------------------
4949

50-
function workerOffsetOf(m: any, name: string): number {
50+
function workerOffsetOf(m: any, name: string, required = true): number {
5151
const fn = m[`_rac_wasm_offsetof_${name}`];
52-
return typeof fn === 'function' ? fn() : 0;
52+
if (typeof fn === 'function') return fn();
53+
if (required) {
54+
throw new Error(`Missing WASM offsetof export: _rac_wasm_offsetof_${name} — ABI mismatch between WASM binary and TS`);
55+
}
56+
return 0;
5357
}
5458

55-
function workerSizeOf(m: any, name: string): number {
59+
function workerSizeOf(m: any, name: string, required = true): number {
5660
const fn = m[`_rac_wasm_sizeof_${name}`];
57-
return typeof fn === 'function' ? fn() : 0;
61+
if (typeof fn === 'function') return fn();
62+
if (required) {
63+
throw new Error(`Missing WASM sizeof export: _rac_wasm_sizeof_${name} — ABI mismatch between WASM binary and TS`);
64+
}
65+
return 0;
5866
}
5967

6068
function loadOffsetsFromModule(m: any): AllOffsets {

sdk/runanywhere-web/packages/llamacpp/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ export type { LlamaCppModule } from './Foundation/LlamaCppBridge';
3232
// Extensions (backend-specific implementations)
3333
export { TextGeneration } from './Extensions/RunAnywhere+TextGeneration';
3434
export { VLM, VLMModelFamily } from './Extensions/RunAnywhere+VLM';
35+
export { VLMImageFormat } from './Extensions/VLMTypes';
36+
export type { VLMImage, VLMGenerationOptions, VLMGenerationResult, VLMStreamingResult } from './Extensions/VLMTypes';
3537
export { ToolCalling, ToolCallFormat, toToolValue, fromToolValue, getStringArg, getNumberArg } from './Extensions/RunAnywhere+ToolCalling';
3638
export type {
3739
ToolValue, ToolParameterType, ToolParameter, ToolDefinition,

sdk/runanywhere-web/packages/onnx/src/index.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,14 @@ export type { TTSVoiceConfig } from './Extensions/RunAnywhere+TTS';
3333
export { VAD } from './Extensions/RunAnywhere+VAD';
3434
export type { VADModelConfig } from './Extensions/RunAnywhere+VAD';
3535

36+
// Backward-compatible re-exports of shared contract types
37+
export type {
38+
STTTranscriptionResult, STTWord, STTTranscribeOptions,
39+
STTStreamCallback, STTStreamingSession,
40+
TTSSynthesisResult, TTSSynthesizeOptions,
41+
SpeechActivityCallback, SpeechSegment,
42+
} from '@runanywhere/web';
43+
export { SpeechActivity } from '@runanywhere/web';
44+
3645
// Foundation
3746
export { SherpaONNXBridge } from './Foundation/SherpaONNXBridge';

0 commit comments

Comments
 (0)