RunanywhereAI
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift‎
Lines changed: 24 additions & 7 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift‎
Lines changed: 24 additions & 7 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift‎
Lines changed: 11 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift‎
Lines changed: 4 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift‎
Lines changed: 2 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift‎
Lines changed: 2 additions & 1 deletion b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift‎
Lines changed: 2 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift‎
Lines changed: 4 additions & 1 deletion b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift‎
Lines changed: 37 additions & 11 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift‎
Lines changed: 37 additions & 11 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Views/ChatInterfaceView.swift‎
Lines changed: 30 additions & 6 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Views/ChatInterfaceView.swift‎
Lines changed: 30 additions & 6 deletions
diff --git a/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Models/ModelListViewModel.swift‎
Lines changed: 6 additions & 0 deletions b/‎examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Models/ModelListViewModel.swift‎
Lines changed: 6 additions & 0 deletions
@@ -67,6 +67,7 @@ struct RunAnywhereAIApp: App {
                 }
             }
             .task {
+                _ = SettingsViewModel.shared
                 logger.info("🏁 App launched, initializing SDK...")
                 await initializeSDK()
             }
@@ -213,7 +214,7 @@ struct RunAnywhereAIApp: App {
                 memoryRequirement: 4_000_000_000
             )
         }
-        if let qwenURL = URL(string: "https://huggingface.co/Triangle104/Qwen2.5-0.5B-Instruct-Q6_K-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {
+        if let qwenURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {
             RunAnywhere.registerModel(
                 id: "qwen2.5-0.5b-instruct-q6_k",
                 name: "Qwen 2.5 0.5B Instruct Q6_K",
@@ -222,6 +223,16 @@ struct RunAnywhereAIApp: App {
                 memoryRequirement: 600_000_000
             )
         }
+        // Qwen 2.5 0.5B base model (Q8_0) — LoRA-compatible base for abliterated adapter
+        if let qwenBaseURL = URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/base-model-q8_0.gguf") {
+            RunAnywhere.registerModel(
+                id: "qwen2.5-0.5b-base-q8_0",
+                name: "Qwen 2.5 0.5B Base Q8_0",
+                url: qwenBaseURL,
+                framework: .llamaCpp,
+                memoryRequirement: 600_000_000
+            )
+        }
         // Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters)
         // TODO: [Portal Integration] Remove once portal delivers model + adapter pairings
         if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") {
@@ -300,7 +311,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3 0.6B Q4_K_M",
                 url: qwen3_06bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 500_000_000
+                memoryRequirement: 500_000_000,
+                supportsThinking: true
             )
         }
         if let qwen3_17bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf") {
@@ -309,7 +321,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3 1.7B Q4_K_M",
                 url: qwen3_17bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 1_200_000_000
+                memoryRequirement: 1_200_000_000,
+                supportsThinking: true
             )
         }
         if let qwen3_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf") {
@@ -318,7 +331,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3 4B Q4_K_M",
                 url: qwen3_4bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 2_800_000_000
+                memoryRequirement: 2_800_000_000,
+                supportsThinking: true
             )
         }
 
@@ -329,7 +343,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3.5 0.8B Q4_K_M",
                 url: qwen35_08bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 600_000_000
+                memoryRequirement: 600_000_000,
+                supportsThinking: true
             )
         }
         if let qwen35_2bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf") {
@@ -338,7 +353,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3.5 2B Q4_K_M",
                 url: qwen35_2bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 1_500_000_000
+                memoryRequirement: 1_500_000_000,
+                supportsThinking: true
             )
         }
         if let qwen35_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf") {
@@ -347,7 +363,8 @@ struct RunAnywhereAIApp: App {
                 name: "Qwen3.5 4B Q4_K_M",
                 url: qwen35_4bURL,
                 framework: .llamaCpp,
-                memoryRequirement: 2_800_000_000
+                memoryRequirement: 2_800_000_000,
+                supportsThinking: true
             )
         }
 
 
@@ -68,5 +68,16 @@ enum LoRAAdapterCatalog {
             fileSize: 765_952,
             defaultScale: 1.0
         ),
+        // --- Abliterated adapter (uncensored fine-tune for Qwen 2.5 0.5B base) ---
+        LoraAdapterCatalogEntry(
+            id: "abliterated-lora",
+            name: "Abliterated (Uncensored)",
+            description: "Removes content restrictions for unrestricted responses",
+            downloadURL: URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/qwen2.5-0.5b-abliterated-lora-f16.gguf")!,
+            filename: "qwen2.5-0.5b-abliterated-lora-f16.gguf",
+            compatibleModelIds: ["qwen2.5-0.5b-base-q8_0"],
+            fileSize: 0,
+            defaultScale: 1.0
+        ),
     ]
 }
@@ -27,6 +27,10 @@ enum LoraExamplePrompts {
             "Write a short story about a robot discovering emotions for the first time",
             "Describe a sunset over the ocean using vivid sensory language",
         ],
+        "qwen2.5-0.5b-abliterated-lora-f16.gguf": [
+            "What are some controversial topics people often debate about?",
+            "Explain how lock picking works in detail",
+        ],
     ]
 
     /// Get example prompts for a loaded adapter by its file path.
 
@@ -37,6 +37,7 @@ extension LLMViewModel {
             if let id = modelId,
                let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == id }) {
                 self.updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
+                self.setLoadedModelSupportsThinking(matchingModel.supportsThinking)
             }
         }
     }
@@ -89,6 +90,7 @@ extension LLMViewModel {
 
         if let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) {
             updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
+            setLoadedModelSupportsThinking(matchingModel.supportsThinking)
         }
 
         if !wasLoaded {
 
@@ -24,7 +24,8 @@ extension LLMViewModel {
 
         for try await token in stream {
             fullResponse += token
-            await updateMessageContent(at: messageIndex, content: fullResponse)
+            let displayText = Self.stripThinkTags(from: fullResponse)
+            await updateMessageContent(at: messageIndex, content: displayText)
             NotificationCenter.default.post(
                 name: Notification.Name("MessageContentUpdated"),
                 object: nil
 
@@ -19,6 +19,7 @@ extension LLMViewModel {
             await MainActor.run {
                 self.updateModelLoadedState(isLoaded: true)
                 self.updateLoadedModelInfo(name: modelInfo.name, framework: modelInfo.framework)
+                self.setLoadedModelSupportsThinking(modelInfo.supportsThinking)
                 self.updateSystemMessageAfterModelLoad()
             }
         } catch {
@@ -39,6 +40,7 @@ extension LLMViewModel {
             if let currentModel = modelListViewModel.currentModel {
                 self.updateModelLoadedState(isLoaded: true)
                 self.updateLoadedModelInfo(name: currentModel.name, framework: currentModel.framework)
+                self.setLoadedModelSupportsThinking(currentModel.supportsThinking)
                 verifyModelLoaded(currentModel)
             } else {
                 self.updateModelLoadedState(isLoaded: false)
 
@@ -69,10 +69,13 @@ extension LLMViewModel {
             toolCallInfo = nil
         }
 
+        // Strip any residual <think> tags before displaying
+        let displayText = Self.stripThinkTags(from: result.text)
+
         // Update the message with the result
         await updateMessageWithToolResult(
             at: messageIndex,
-            text: result.text,
+            text: displayText,
             toolCallInfo: toolCallInfo
         )
     }
 
@@ -29,6 +29,7 @@ final class LLMViewModel {
     private(set) var error: Error?
     private(set) var isModelLoaded = false
     private(set) var loadedModelName: String?
+    private(set) var loadedModelSupportsThinking = false
     private(set) var selectedFramework: InferenceFramework?
     private(set) var modelSupportsStreaming = true
     private(set) var currentConversation: Conversation?
@@ -80,8 +81,13 @@ final class LLMViewModel {
         selectedFramework = framework
     }
 
+    func setLoadedModelSupportsThinking(_ value: Bool) {
+        loadedModelSupportsThinking = value
+    }
+
     func clearLoadedModelInfo() {
         loadedModelName = nil
+        loadedModelSupportsThinking = false
         selectedFramework = nil
     }
 
@@ -244,14 +250,21 @@ final class LLMViewModel {
         do {
             try await ensureModelIsLoaded()
             let options = getGenerationOptions()
-            try await performGeneration(prompt: prompt, options: options, messageIndex: messageIndex)
+            let effectivePrompt = applyThinkingModePrefix(to: prompt)
+            try await performGeneration(prompt: effectivePrompt, options: options, messageIndex: messageIndex)
         } catch {
             await handleGenerationError(error, at: messageIndex)
         }
 
         await finalizeGeneration(at: messageIndex)
     }
 
+    private func applyThinkingModePrefix(to prompt: String) -> String {
+        guard loadedModelSupportsThinking else { return prompt }
+        let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled
+        return thinkingModeEnabled ? prompt : "/no_think\n\(prompt)"
+    }
+
     private func performGeneration(
         prompt: String,
         options: LLMGenerationOptions,
@@ -476,20 +489,17 @@ final class LLMViewModel {
         if !isModelLoaded {
             throw LLMError.noModelLoaded
         }
-
-        // Verify model is actually loaded in SDK
-        if let model = ModelListViewModel.shared.currentModel {
-            try await RunAnywhere.loadModel(model.id)
-        }
     }
 
     private func getGenerationOptions() -> LLMGenerationOptions {
-        let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
+        // Use object(forKey:) to distinguish an unset key (nil) from a value explicitly set to 0.0
+        let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
         let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
         let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt")
+        let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled
 
         let effectiveSettings = (
-            temperature: savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue,
+            temperature: savedTemperature ?? Self.defaultTemperatureValue,
             maxTokens: savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
         )
 
@@ -501,7 +511,7 @@ final class LLMViewModel {
     }()
 
     logger.info(
-        "[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(systemPromptInfo)"
+        "[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), thinkingMode=\(thinkingModeEnabled), systemPrompt=\(systemPromptInfo)"
     )
 
     return LLMGenerationOptions(
@@ -519,8 +529,8 @@ final class LLMViewModel {
     }
 
     private func ensureSettingsAreApplied() async {
-        let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
-        let temperature = savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue
+        let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
+        let temperature = savedTemperature ?? Self.defaultTemperatureValue
 
         let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
         let maxTokens = savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
@@ -542,6 +552,7 @@ final class LLMViewModel {
                 await MainActor.run {
                     self.isModelLoaded = true
                     self.loadedModelName = model.name
+                    self.loadedModelSupportsThinking = model.supportsThinking
                     self.selectedFramework = model.framework
                     self.modelSupportsStreaming = supportsStreaming
 
@@ -563,4 +574,19 @@ final class LLMViewModel {
             loadConversation(conversation)
         }
     }
+
+    static func stripThinkTags(from text: String) -> String {
+        var result = text
+        // Remove complete <think>...</think> blocks 
+        while let startRange = result.range(of: "<think>"),
+              let endRange = result.range(of: "</think>"),
+              startRange.upperBound <= endRange.lowerBound {
+            result.removeSubrange(startRange.lowerBound..<endRange.upperBound)
+        }
+        if let trailingStart = result.range(of: "<think>", options: .backwards),
+           result.range(of: "</think>", range: trailingStart.upperBound..<result.endIndex) == nil {
+            result = String(result[result.startIndex..<trailingStart.lowerBound])
+        }
+        return result.trimmingCharacters(in: .whitespacesAndNewlines)
+    }
 }
@@ -31,6 +31,8 @@ struct ChatInterfaceView: View {
     @State private var showingLoRAManagement = false
     @State private var pendingLoRAURL: URL?
     @State private var loraScale: Float = 1.0
+    @ObservedObject private var toolSettingsViewModel = ToolSettingsViewModel.shared
+    @AppStorage("thinkingModeEnabled") private var thinkingModeEnabled = false
     @FocusState private var isTextFieldFocused: Bool
 
     private let logger = Logger(
@@ -369,8 +371,8 @@ extension ChatInterfaceView {
             .onReceive(
                 NotificationCenter.default.publisher(for: Notification.Name("MessageContentUpdated"))
             ) { _ in
-                if viewModel.isGenerating {
-                    proxy.scrollTo("typing", anchor: .bottom)
+                if viewModel.isGenerating, let lastMessage = viewModel.messages.last {
+                    proxy.scrollTo(lastMessage.id, anchor: .bottom)
                 }
             }
         }
@@ -412,7 +414,7 @@ extension ChatInterfaceView {
                     .animation(nil, value: message.content)
             }
 
-            if viewModel.isGenerating {
+            if viewModel.isGenerating, viewModel.messages.last?.content.isEmpty == true {
                 TypingIndicatorView()
                     .id("typing")
                     .transition(typingTransition)
@@ -445,9 +447,13 @@ extension ChatInterfaceView {
         VStack(spacing: 0) {
             Divider()
 
-            // Status badges (tool calling + LoRA)
+            // Status badges (thinking mode + tool calling + LoRA)
             HStack(spacing: 8) {
-                if viewModel.useToolCalling {
+                if thinkingModeEnabled && viewModel.loadedModelSupportsThinking {
+                    thinkingModeBadge
+                }
+
+                if viewModel.useToolCalling && !toolSettingsViewModel.registeredTools.isEmpty {
                     toolCallingBadge
                 }
 
@@ -459,7 +465,7 @@ extension ChatInterfaceView {
                     loraAddButton
                 }
             }
-            .padding(.top, (viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
+            .padding(.top, ((thinkingModeEnabled && viewModel.loadedModelSupportsThinking) || viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
 
             HStack(spacing: AppSpacing.mediumLarge) {
                 TextField("Type a message...", text: $viewModel.currentInput, axis: .vertical)
@@ -493,6 +499,24 @@ extension ChatInterfaceView {
         }
     }
 
+    var thinkingModeBadge: some View {
+        Button {
+            thinkingModeEnabled.toggle()
+        } label: {
+            HStack(spacing: 6) {
+                Image(systemName: "lightbulb.min.fill")
+                    .font(.system(size: 10))
+                Text("Thinking")
+                    .font(AppTypography.caption2)
+            }
+            .foregroundColor(AppColors.primaryPurple)
+            .padding(.horizontal, 10)
+            .padding(.vertical, 4)
+            .background(AppColors.primaryPurple.opacity(0.1))
+            .cornerRadius(6)
+        }
+    }
+
     var toolCallingBadge: some View {
         HStack(spacing: 6) {
             Image(systemName: "wrench.and.screwdriver")
 
@@ -129,8 +129,14 @@ class ModelListViewModel: ObservableObject {
         await loadModelsFromRegistry()
     }
 
+    private var isLoadingModel = false
+
     /// Select and load a model
     func selectModel(_ model: ModelInfo) async {
+        guard !isLoadingModel else { return }
+        isLoadingModel = true
+        defer { isLoadingModel = false }
+
         do {
             try await loadModel(model)
             setCurrentModel(model)
Original file line number	Diff line number	Diff line change
`@@ -67,6 +67,7 @@ struct RunAnywhereAIApp: App {`
`67`	`67`	`}`
`68`	`68`	`}`
`69`	`69`	`.task {`
	`70`	`+ _ = SettingsViewModel.shared`
`70`	`71`	`logger.info("🏁 App launched, initializing SDK...")`
`71`	`72`	`await initializeSDK()`
`72`	`73`	`}`
`@@ -213,7 +214,7 @@ struct RunAnywhereAIApp: App {`
`213`	`214`	`memoryRequirement: 4_000_000_000`
`214`	`215`	`)`
`215`	`216`	`}`
`216`		`- if let qwenURL = URL(string: "https://huggingface.co/Triangle104/Qwen2.5-0.5B-Instruct-Q6_K-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {`
	`217`	`+ if let qwenURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {`
`217`	`218`	`RunAnywhere.registerModel(`
`218`	`219`	`id: "qwen2.5-0.5b-instruct-q6_k",`
`219`	`220`	`name: "Qwen 2.5 0.5B Instruct Q6_K",`
`@@ -222,6 +223,16 @@ struct RunAnywhereAIApp: App {`
`222`	`223`	`memoryRequirement: 600_000_000`
`223`	`224`	`)`
`224`	`225`	`}`
	`226`	`+ // Qwen 2.5 0.5B base model (Q8_0) — LoRA-compatible base for abliterated adapter`
	`227`	`+ if let qwenBaseURL = URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/base-model-q8_0.gguf") {`
	`228`	`+ RunAnywhere.registerModel(`
	`229`	`+ id: "qwen2.5-0.5b-base-q8_0",`
	`230`	`+ name: "Qwen 2.5 0.5B Base Q8_0",`
	`231`	`+ url: qwenBaseURL,`
	`232`	`+ framework: .llamaCpp,`
	`233`	`+ memoryRequirement: 600_000_000`
	`234`	`+ )`
	`235`	`+ }`
`225`	`236`	`// Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters)`
`226`	`237`	`// TODO: [Portal Integration] Remove once portal delivers model + adapter pairings`
`227`	`238`	`if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") {`
`@@ -300,7 +311,8 @@ struct RunAnywhereAIApp: App {`
`300`	`311`	`name: "Qwen3 0.6B Q4_K_M",`
`301`	`312`	`url: qwen3_06bURL,`
`302`	`313`	`framework: .llamaCpp,`
`303`		`- memoryRequirement: 500_000_000`
	`314`	`+ memoryRequirement: 500_000_000,`
	`315`	`+ supportsThinking: true`
`304`	`316`	`)`
`305`	`317`	`}`
`306`	`318`	`if let qwen3_17bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf") {`
`@@ -309,7 +321,8 @@ struct RunAnywhereAIApp: App {`
`309`	`321`	`name: "Qwen3 1.7B Q4_K_M",`
`310`	`322`	`url: qwen3_17bURL,`
`311`	`323`	`framework: .llamaCpp,`
`312`		`- memoryRequirement: 1_200_000_000`
	`324`	`+ memoryRequirement: 1_200_000_000,`
	`325`	`+ supportsThinking: true`
`313`	`326`	`)`
`314`	`327`	`}`
`315`	`328`	`if let qwen3_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf") {`
`@@ -318,7 +331,8 @@ struct RunAnywhereAIApp: App {`
`318`	`331`	`name: "Qwen3 4B Q4_K_M",`
`319`	`332`	`url: qwen3_4bURL,`
`320`	`333`	`framework: .llamaCpp,`
`321`		`- memoryRequirement: 2_800_000_000`
	`334`	`+ memoryRequirement: 2_800_000_000,`
	`335`	`+ supportsThinking: true`
`322`	`336`	`)`
`323`	`337`	`}`
`324`	`338`
`@@ -329,7 +343,8 @@ struct RunAnywhereAIApp: App {`
`329`	`343`	`name: "Qwen3.5 0.8B Q4_K_M",`
`330`	`344`	`url: qwen35_08bURL,`
`331`	`345`	`framework: .llamaCpp,`
`332`		`- memoryRequirement: 600_000_000`
	`346`	`+ memoryRequirement: 600_000_000,`
	`347`	`+ supportsThinking: true`
`333`	`348`	`)`
`334`	`349`	`}`
`335`	`350`	`if let qwen35_2bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf") {`
`@@ -338,7 +353,8 @@ struct RunAnywhereAIApp: App {`
`338`	`353`	`name: "Qwen3.5 2B Q4_K_M",`
`339`	`354`	`url: qwen35_2bURL,`
`340`	`355`	`framework: .llamaCpp,`
`341`		`- memoryRequirement: 1_500_000_000`
	`356`	`+ memoryRequirement: 1_500_000_000,`
	`357`	`+ supportsThinking: true`
`342`	`358`	`)`
`343`	`359`	`}`
`344`	`360`	`if let qwen35_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf") {`
`@@ -347,7 +363,8 @@ struct RunAnywhereAIApp: App {`
`347`	`363`	`name: "Qwen3.5 4B Q4_K_M",`
`348`	`364`	`url: qwen35_4bURL,`
`349`	`365`	`framework: .llamaCpp,`
`350`		`- memoryRequirement: 2_800_000_000`
	`366`	`+ memoryRequirement: 2_800_000_000,`
	`367`	`+ supportsThinking: true`
`351`	`368`	`)`
`352`	`369`	`}`
`353`	`370`
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@ extension LLMViewModel {`
`37`	`37`	`if let id = modelId,`
`38`	`38`	`let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == id }) {`
`39`	`39`	`self.updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)`
	`40`	`+ self.setLoadedModelSupportsThinking(matchingModel.supportsThinking)`
`40`	`41`	`}`
`41`	`42`	`}`
`42`	`43`	`}`
`@@ -89,6 +90,7 @@ extension LLMViewModel {`
`89`	`90`
`90`	`91`	`if let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) {`
`91`	`92`	`updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)`
	`93`	`+ setLoadedModelSupportsThinking(matchingModel.supportsThinking)`
`92`	`94`	`}`
`93`	`95`
`94`	`96`	`if !wasLoaded {`
Original file line number	Diff line number	Diff line change
`@@ -69,10 +69,13 @@ extension LLMViewModel {`
`69`	`69`	`toolCallInfo = nil`
`70`	`70`	`}`
`71`	`71`
	`72`	`+ // Strip any residual <think> tags before displaying`
	`73`	`+ let displayText = Self.stripThinkTags(from: result.text)`
	`74`	`+`
`72`	`75`	`// Update the message with the result`
`73`	`76`	`await updateMessageWithToolResult(`
`74`	`77`	`at: messageIndex,`
`75`		`- text: result.text,`
	`78`	`+ text: displayText,`
`76`	`79`	`toolCallInfo: toolCallInfo`
`77`	`80`	`)`
`78`	`81`	`}`