@@ -371,7 +371,7 @@ bool LlamaCppTextGeneration::unload_model_internal() {
371371 // Clear LoRA adapters from context before freeing
372372 // (adapter memory is freed automatically with the model per llama.cpp API)
373373 if (context_ && !lora_adapters_.empty ()) {
374- llama_clear_adapter_lora (context_);
374+ llama_set_adapters_lora (context_, nullptr , 0 , nullptr );
375375 }
376376 lora_adapters_.clear ();
377377
@@ -828,13 +828,32 @@ bool LlamaCppTextGeneration::recreate_context() {
828828}
829829
830830bool LlamaCppTextGeneration::apply_lora_adapters () {
831+ if (lora_adapters_.empty ()) {
832+ // Clear all adapters from context
833+ llama_set_adapters_lora (context_, nullptr , 0 , nullptr );
834+ return true ;
835+ }
836+
837+ std::vector<llama_adapter_lora*> adapters;
838+ std::vector<float > scales;
839+ adapters.reserve (lora_adapters_.size ());
840+ scales.reserve (lora_adapters_.size ());
841+
831842 for (auto & entry : lora_adapters_) {
832- int32_t result = llama_set_adapter_lora (context_, entry.adapter , entry.scale );
833- if (result != 0 ) {
834- LOGE (" Failed to apply LoRA adapter: %s (error=%d)" , entry.path .c_str (), result);
843+ adapters.push_back (entry.adapter );
844+ scales.push_back (entry.scale );
845+ }
846+
847+ int32_t result = llama_set_adapters_lora (context_, adapters.data (), adapters.size (), scales.data ());
848+ if (result != 0 ) {
849+ LOGE (" Failed to apply LoRA adapters (error=%d)" , result);
850+ for (auto & entry : lora_adapters_) {
835851 entry.applied = false ;
836- return false ;
837852 }
853+ return false ;
854+ }
855+
856+ for (auto & entry : lora_adapters_) {
838857 entry.applied = true ;
839858 LOGI (" Applied LoRA adapter: %s (scale=%.2f)" , entry.path .c_str (), entry.scale );
840859 }
@@ -911,17 +930,16 @@ bool LlamaCppTextGeneration::remove_lora_adapter(const std::string& adapter_path
911930 return false ;
912931 }
913932
914- // Remove from context
915- int32_t result = llama_rm_adapter_lora (context_, it->adapter );
916- if (result != 0 ) {
917- LOGE (" Failed to remove LoRA adapter from context: %s (error=%d)" , adapter_path.c_str (), result);
918- return false ;
919- }
920-
921933 // Remove from tracking (adapter memory is freed automatically with the model
922934 // per llama.cpp API — llama_adapter_lora_free is deprecated since b8011)
923935 lora_adapters_.erase (it);
924936
937+ // Re-apply remaining adapters (or clear if none left)
938+ if (!apply_lora_adapters ()) {
939+ LOGE (" Failed to re-apply remaining LoRA adapters after removal" );
940+ return false ;
941+ }
942+
925943 // Clear KV cache after adapter changes
926944 llama_memory_clear (llama_get_memory (context_), true );
927945
@@ -937,7 +955,7 @@ void LlamaCppTextGeneration::clear_lora_adapters() {
937955 }
938956
939957 if (context_) {
940- llama_clear_adapter_lora (context_);
958+ llama_set_adapters_lora (context_, nullptr , 0 , nullptr );
941959 llama_memory_clear (llama_get_memory (context_), true );
942960 }
943961
0 commit comments