Skip to content

Commit 03e2ba6

Browse files
addressed code rabbit comments
1 parent 6730759 commit 03e2ba6

5 files changed

Lines changed: 65 additions & 36 deletions

File tree

sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,12 @@ bool LlamaCppTextGeneration::unload_model_internal() {
370370

371371
// Clear LoRA adapters from context before freeing
372372
// (adapter memory is freed automatically with the model per llama.cpp API)
373+
// Best-effort during teardown: log but don't fail unload on error.
373374
if (context_ && !lora_adapters_.empty()) {
374-
llama_set_adapters_lora(context_, nullptr, 0, nullptr);
375+
int32_t rc = llama_set_adapters_lora(context_, nullptr, 0, nullptr);
376+
if (rc != 0) {
377+
LOGE("Failed to clear LoRA adapters during unload (error=%d)", rc);
378+
}
375379
}
376380
lora_adapters_.clear();
377381

@@ -927,18 +931,23 @@ bool LlamaCppTextGeneration::remove_lora_adapter(const std::string& adapter_path
927931
return false;
928932
}
929933

930-
// Remove from tracking and re-apply remaining adapters
931-
lora_adapters_.erase(it);
932-
933-
// Re-apply remaining adapters (or clear if none left)
934+
// Build remaining adapter list BEFORE mutating tracking state
934935
std::vector<llama_adapter_lora*> adapters;
935936
std::vector<float> scales;
936-
for (auto& entry : lora_adapters_) {
937-
adapters.push_back(entry.adapter);
938-
scales.push_back(entry.scale);
937+
for (auto iter = lora_adapters_.begin(); iter != lora_adapters_.end(); ++iter) {
938+
if (iter == it) continue;
939+
adapters.push_back(iter->adapter);
940+
scales.push_back(iter->scale);
939941
}
940-
llama_set_adapters_lora(context_, adapters.empty() ? nullptr : adapters.data(),
941-
adapters.size(), adapters.empty() ? nullptr : scales.data());
942+
943+
int32_t rc = llama_set_adapters_lora(context_, adapters.empty() ? nullptr : adapters.data(),
944+
adapters.size(), adapters.empty() ? nullptr : scales.data());
945+
if (rc != 0) {
946+
LOGE("Failed to re-apply LoRA adapters after removal (error=%d)", rc);
947+
return false;
948+
}
949+
950+
lora_adapters_.erase(it);
942951

943952
// Clear KV cache after adapter changes
944953
llama_memory_clear(llama_get_memory(context_), true);
@@ -955,7 +964,11 @@ void LlamaCppTextGeneration::clear_lora_adapters() {
955964
}
956965

957966
if (context_) {
958-
llama_set_adapters_lora(context_, nullptr, 0, nullptr);
967+
int32_t rc = llama_set_adapters_lora(context_, nullptr, 0, nullptr);
968+
if (rc != 0) {
969+
LOGE("Failed to clear all LoRA adapters (error=%d)", rc);
970+
return;
971+
}
959972
llama_memory_clear(llama_get_memory(context_), true);
960973
}
961974

sdk/runanywhere-commons/src/backends/llamacpp/rac_vlm_llamacpp.cpp

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -208,29 +208,42 @@ std::string format_vlm_prompt_with_template(llama_model* model, const std::strin
208208
return formatted;
209209
}
210210
}
211-
RAC_LOG_WARNING(LOG_CAT, "llama_chat_apply_template with system failed (size=%d), trying without", size);
211+
bool has_explicit_system = (system_prompt && system_prompt[0] != '\0');
212+
if (has_explicit_system) {
213+
RAC_LOG_WARNING(LOG_CAT, "Template with system failed (size=%d); falling back to manual to preserve explicit system prompt", size);
214+
} else {
215+
RAC_LOG_WARNING(LOG_CAT, "llama_chat_apply_template with system failed (size=%d), trying without", size);
216+
}
217+
// If the caller passed an explicit system prompt, skip user-only
218+
// template to avoid silently dropping it -- go straight to manual.
219+
if (has_explicit_system) {
220+
goto manual_fallback;
221+
}
212222
}
213223

214-
llama_chat_message messages[1];
215-
messages[0].role = "user";
216-
messages[0].content = user_content.c_str();
217-
218-
int32_t size = llama_chat_apply_template(tmpl, messages, 1, true, nullptr, 0);
219-
if (size > 0) {
220-
std::vector<char> buf(size + 1);
221-
int32_t result = llama_chat_apply_template(tmpl, messages, 1, true, buf.data(), buf.size());
222-
if (result > 0) {
223-
std::string formatted(buf.data(), result);
224-
RAC_LOG_DEBUG(LOG_CAT, "Template-formatted prompt (%d chars): %s",
225-
(int)formatted.length(), formatted.c_str());
226-
return formatted;
224+
{
225+
llama_chat_message messages[1];
226+
messages[0].role = "user";
227+
messages[0].content = user_content.c_str();
228+
229+
int32_t size = llama_chat_apply_template(tmpl, messages, 1, true, nullptr, 0);
230+
if (size > 0) {
231+
std::vector<char> buf(size + 1);
232+
int32_t result = llama_chat_apply_template(tmpl, messages, 1, true, buf.data(), buf.size());
233+
if (result > 0) {
234+
std::string formatted(buf.data(), result);
235+
RAC_LOG_DEBUG(LOG_CAT, "Template-formatted prompt (%d chars): %s",
236+
(int)formatted.length(), formatted.c_str());
237+
return formatted;
238+
}
227239
}
240+
RAC_LOG_WARNING(LOG_CAT, "llama_chat_apply_template failed (size=%d), falling back to manual", size);
228241
}
229-
RAC_LOG_WARNING(LOG_CAT, "llama_chat_apply_template failed (size=%d), falling back to manual", size);
230242
} else {
231243
RAC_LOG_DEBUG(LOG_CAT, "No chat template in model, using manual formatting");
232244
}
233245

246+
manual_fallback:
234247
// Fallback: manual chatml format (works for most models)
235248
std::string formatted;
236249
if (effective_system) {
@@ -659,9 +672,8 @@ rac_result_t rac_vlm_llamacpp_process(rac_handle_t handle, const rac_vlm_image_t
659672
full_prompt = format_vlm_prompt_with_template(backend->model, prompt, image_marker, has_image,
660673
system_prompt, effective_model_type);
661674

662-
RAC_LOG_INFO(LOG_CAT, "[v3-process] Prompt (%d chars, img=%d, type=%d): %.200s",
663-
(int)full_prompt.length(), has_image ? 1 : 0, (int)effective_model_type,
664-
full_prompt.c_str());
675+
RAC_LOG_INFO(LOG_CAT, "[v3-process] Prompt ready (chars=%d, img=%d, type=%d)",
676+
(int)full_prompt.length(), has_image ? 1 : 0, (int)effective_model_type);
665677

666678
// Tokenize and evaluate
667679
if (backend->mtmd_ctx && bitmap) {
@@ -915,9 +927,8 @@ rac_result_t rac_vlm_llamacpp_process_stream(rac_handle_t handle, const rac_vlm_
915927
full_prompt = format_vlm_prompt_with_template(backend->model, prompt, image_marker, has_image,
916928
system_prompt, effective_model_type);
917929

918-
RAC_LOG_INFO(LOG_CAT, "[v3-stream] Prompt (%d chars, img=%d, type=%d): %.200s",
919-
(int)full_prompt.length(), has_image ? 1 : 0, (int)effective_model_type,
920-
full_prompt.c_str());
930+
RAC_LOG_INFO(LOG_CAT, "[v3-stream] Prompt ready (chars=%d, img=%d, type=%d)",
931+
(int)full_prompt.length(), has_image ? 1 : 0, (int)effective_model_type);
921932

922933
// Tokenize and evaluate
923934
if (backend->mtmd_ctx && bitmap) {

sdk/runanywhere-web/packages/llamacpp/src/Infrastructure/VLMWorkerRuntime.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,8 @@ async function processImage(
595595
for (let i = 0; i < optSize; i++) m.setValue(optPtr + i, 0, 'i8');
596596
const vo = offsets!.vlmOptions;
597597
m.setValue(optPtr + vo.maxTokens, maxTokens, 'i32');
598-
m.setValue(optPtr + vo.temperature, temperature, 'float');
599-
m.setValue(optPtr + vo.topP, topP, 'float');
598+
m.setValue(optPtr + vo.temperature, Number.isFinite(temperature) ? temperature : 0.7, 'float');
599+
m.setValue(optPtr + vo.topP, Number.isFinite(topP) ? topP : 0.9, 'float');
600600

601601
let systemPromptPtr = 0;
602602
if (systemPrompt) {

sdk/runanywhere-web/packages/onnx/src/Foundation/SherpaHelperLoader.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ async function doLoad<T>(
9595
): Promise<T> {
9696
// Prefer the bridge's resolved base URL (auto-derived during WASM load)
9797
// over import.meta.url which breaks when bundlers rewrite module paths.
98-
const bridgeBase = SherpaONNXBridge.shared.helperBaseUrl;
98+
const raw = SherpaONNXBridge.shared.helperBaseUrl;
99+
const bridgeBase = raw ? (raw.endsWith('/') ? raw : `${raw}/`) : null;
99100
const url = bridgeBase
100101
? `${bridgeBase}${filename}`
101102
: new URL(`../../wasm/sherpa/${filename}`, import.meta.url).href;

sdk/runanywhere-web/packages/onnx/src/ONNX.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ export const ONNX = {
4545
async register(options?: ONNXRegisterOptions): Promise<void> {
4646
const bridge = SherpaONNXBridge.shared;
4747
if (options?.wasmUrl) bridge.wasmUrl = options.wasmUrl;
48-
if (options?.helperBaseUrl) bridge.helperBaseUrl = options.helperBaseUrl;
48+
if (options?.helperBaseUrl) {
49+
bridge.helperBaseUrl = options.helperBaseUrl.endsWith('/')
50+
? options.helperBaseUrl
51+
: `${options.helperBaseUrl}/`;
52+
}
4953
return ONNXProvider.register();
5054
},
5155

0 commit comments

Comments
 (0)