@@ -208,29 +208,42 @@ std::string format_vlm_prompt_with_template(llama_model* model, const std::strin
208208 return formatted;
209209 }
210210 }
211- RAC_LOG_WARNING (LOG_CAT, " llama_chat_apply_template with system failed (size=%d), trying without" , size);
211+ bool has_explicit_system = (system_prompt && system_prompt[0 ] != ' \0 ' );
212+ if (has_explicit_system) {
213+ RAC_LOG_WARNING (LOG_CAT, " Template with system failed (size=%d); falling back to manual to preserve explicit system prompt" , size);
214+ } else {
215+ RAC_LOG_WARNING (LOG_CAT, " llama_chat_apply_template with system failed (size=%d), trying without" , size);
216+ }
217+ // If the caller passed an explicit system prompt, skip user-only
218+ // template to avoid silently dropping it -- go straight to manual.
219+ if (has_explicit_system) {
220+ goto manual_fallback;
221+ }
212222 }
213223
214- llama_chat_message messages[1 ];
215- messages[0 ].role = " user" ;
216- messages[0 ].content = user_content.c_str ();
217-
218- int32_t size = llama_chat_apply_template (tmpl, messages, 1 , true , nullptr , 0 );
219- if (size > 0 ) {
220- std::vector<char > buf (size + 1 );
221- int32_t result = llama_chat_apply_template (tmpl, messages, 1 , true , buf.data (), buf.size ());
222- if (result > 0 ) {
223- std::string formatted (buf.data (), result);
224- RAC_LOG_DEBUG (LOG_CAT, " Template-formatted prompt (%d chars): %s" ,
225- (int )formatted.length (), formatted.c_str ());
226- return formatted;
224+ {
225+ llama_chat_message messages[1 ];
226+ messages[0 ].role = " user" ;
227+ messages[0 ].content = user_content.c_str ();
228+
229+ int32_t size = llama_chat_apply_template (tmpl, messages, 1 , true , nullptr , 0 );
230+ if (size > 0 ) {
231+ std::vector<char > buf (size + 1 );
232+ int32_t result = llama_chat_apply_template (tmpl, messages, 1 , true , buf.data (), buf.size ());
233+ if (result > 0 ) {
234+ std::string formatted (buf.data (), result);
235+ RAC_LOG_DEBUG (LOG_CAT, " Template-formatted prompt (%d chars): %s" ,
236+ (int )formatted.length (), formatted.c_str ());
237+ return formatted;
238+ }
227239 }
240+ RAC_LOG_WARNING (LOG_CAT, " llama_chat_apply_template failed (size=%d), falling back to manual" , size);
228241 }
229- RAC_LOG_WARNING (LOG_CAT, " llama_chat_apply_template failed (size=%d), falling back to manual" , size);
230242 } else {
231243 RAC_LOG_DEBUG (LOG_CAT, " No chat template in model, using manual formatting" );
232244 }
233245
246+ manual_fallback:
234247 // Fallback: manual chatml format (works for most models)
235248 std::string formatted;
236249 if (effective_system) {
@@ -659,9 +672,8 @@ rac_result_t rac_vlm_llamacpp_process(rac_handle_t handle, const rac_vlm_image_t
659672 full_prompt = format_vlm_prompt_with_template (backend->model , prompt, image_marker, has_image,
660673 system_prompt, effective_model_type);
661674
662- RAC_LOG_INFO (LOG_CAT, " [v3-process] Prompt (%d chars, img=%d, type=%d): %.200s" ,
663- (int )full_prompt.length (), has_image ? 1 : 0 , (int )effective_model_type,
664- full_prompt.c_str ());
675+ RAC_LOG_INFO (LOG_CAT, " [v3-process] Prompt ready (chars=%d, img=%d, type=%d)" ,
676+ (int )full_prompt.length (), has_image ? 1 : 0 , (int )effective_model_type);
665677
666678 // Tokenize and evaluate
667679 if (backend->mtmd_ctx && bitmap) {
@@ -915,9 +927,8 @@ rac_result_t rac_vlm_llamacpp_process_stream(rac_handle_t handle, const rac_vlm_
915927 full_prompt = format_vlm_prompt_with_template (backend->model , prompt, image_marker, has_image,
916928 system_prompt, effective_model_type);
917929
918- RAC_LOG_INFO (LOG_CAT, " [v3-stream] Prompt (%d chars, img=%d, type=%d): %.200s" ,
919- (int )full_prompt.length (), has_image ? 1 : 0 , (int )effective_model_type,
920- full_prompt.c_str ());
930+ RAC_LOG_INFO (LOG_CAT, " [v3-stream] Prompt ready (chars=%d, img=%d, type=%d)" ,
931+ (int )full_prompt.length (), has_image ? 1 : 0 , (int )effective_model_type);
921932
922933 // Tokenize and evaluate
923934 if (backend->mtmd_ctx && bitmap) {
0 commit comments