Skip to content

Commit ffcfc1b

Browse files
Merge branch 'ios-demo-polish' into metal-shubham-1
Resolve conflicts in favor of ios-demo-polish for: - llm_component.cpp - ArchiveUtility.swift - RunAnywhere+TextGeneration.swift Made-with: Cursor
2 parents 6e6c19e + c9c6ca6 commit ffcfc1b

32 files changed

Lines changed: 927 additions & 314 deletions

examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct RunAnywhereAIApp: App {
6767
}
6868
}
6969
.task {
70+
_ = SettingsViewModel.shared
7071
logger.info("🏁 App launched, initializing SDK...")
7172
await initializeSDK()
7273
}
@@ -213,7 +214,7 @@ struct RunAnywhereAIApp: App {
213214
memoryRequirement: 4_000_000_000
214215
)
215216
}
216-
if let qwenURL = URL(string: "https://huggingface.co/Triangle104/Qwen2.5-0.5B-Instruct-Q6_K-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {
217+
if let qwenURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q6_k.gguf") {
217218
RunAnywhere.registerModel(
218219
id: "qwen2.5-0.5b-instruct-q6_k",
219220
name: "Qwen 2.5 0.5B Instruct Q6_K",
@@ -222,6 +223,16 @@ struct RunAnywhereAIApp: App {
222223
memoryRequirement: 600_000_000
223224
)
224225
}
226+
// Qwen 2.5 0.5B base model (Q8_0) — LoRA-compatible base for abliterated adapter
227+
if let qwenBaseURL = URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/base-model-q8_0.gguf") {
228+
RunAnywhere.registerModel(
229+
id: "qwen2.5-0.5b-base-q8_0",
230+
name: "Qwen 2.5 0.5B Base Q8_0",
231+
url: qwenBaseURL,
232+
framework: .llamaCpp,
233+
memoryRequirement: 600_000_000
234+
)
235+
}
225236
// Qwen 2.5 1.5B - LoRA-compatible base model (has publicly available GGUF LoRA adapters)
226237
// TODO: [Portal Integration] Remove once portal delivers model + adapter pairings
227238
if let qwen15BURL = URL(string: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf") {
@@ -300,7 +311,8 @@ struct RunAnywhereAIApp: App {
300311
name: "Qwen3 0.6B Q4_K_M",
301312
url: qwen3_06bURL,
302313
framework: .llamaCpp,
303-
memoryRequirement: 500_000_000
314+
memoryRequirement: 500_000_000,
315+
supportsThinking: true
304316
)
305317
}
306318
if let qwen3_17bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q4_K_M.gguf") {
@@ -309,7 +321,8 @@ struct RunAnywhereAIApp: App {
309321
name: "Qwen3 1.7B Q4_K_M",
310322
url: qwen3_17bURL,
311323
framework: .llamaCpp,
312-
memoryRequirement: 1_200_000_000
324+
memoryRequirement: 1_200_000_000,
325+
supportsThinking: true
313326
)
314327
}
315328
if let qwen3_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf") {
@@ -318,7 +331,8 @@ struct RunAnywhereAIApp: App {
318331
name: "Qwen3 4B Q4_K_M",
319332
url: qwen3_4bURL,
320333
framework: .llamaCpp,
321-
memoryRequirement: 2_800_000_000
334+
memoryRequirement: 2_800_000_000,
335+
supportsThinking: true
322336
)
323337
}
324338

@@ -329,7 +343,8 @@ struct RunAnywhereAIApp: App {
329343
name: "Qwen3.5 0.8B Q4_K_M",
330344
url: qwen35_08bURL,
331345
framework: .llamaCpp,
332-
memoryRequirement: 600_000_000
346+
memoryRequirement: 600_000_000,
347+
supportsThinking: true
333348
)
334349
}
335350
if let qwen35_2bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf") {
@@ -338,7 +353,8 @@ struct RunAnywhereAIApp: App {
338353
name: "Qwen3.5 2B Q4_K_M",
339354
url: qwen35_2bURL,
340355
framework: .llamaCpp,
341-
memoryRequirement: 1_500_000_000
356+
memoryRequirement: 1_500_000_000,
357+
supportsThinking: true
342358
)
343359
}
344360
if let qwen35_4bURL = URL(string: "https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf") {
@@ -347,7 +363,8 @@ struct RunAnywhereAIApp: App {
347363
name: "Qwen3.5 4B Q4_K_M",
348364
url: qwen35_4bURL,
349365
framework: .llamaCpp,
350-
memoryRequirement: 2_800_000_000
366+
memoryRequirement: 2_800_000_000,
367+
supportsThinking: true
351368
)
352369
}
353370

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/DemoLoRAAdapter.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,16 @@ enum LoRAAdapterCatalog {
6868
fileSize: 765_952,
6969
defaultScale: 1.0
7070
),
71+
// --- Abliterated adapter (uncensored fine-tune for Qwen 2.5 0.5B base) ---
72+
LoraAdapterCatalogEntry(
73+
id: "abliterated-lora",
74+
name: "Abliterated (Uncensored)",
75+
description: "Removes content restrictions for unrestricted responses",
76+
downloadURL: URL(string: "https://huggingface.co/Void2377/qwen-lora-gguf/resolve/main/qwen2.5-0.5b-abliterated-lora-f16.gguf")!,
77+
filename: "qwen2.5-0.5b-abliterated-lora-f16.gguf",
78+
compatibleModelIds: ["qwen2.5-0.5b-base-q8_0"],
79+
fileSize: 0,
80+
defaultScale: 1.0
81+
),
7182
]
7283
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Models/LoraExamplePrompts.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ enum LoraExamplePrompts {
2727
"Write a short story about a robot discovering emotions for the first time",
2828
"Describe a sunset over the ocean using vivid sensory language",
2929
],
30+
"qwen2.5-0.5b-abliterated-lora-f16.gguf": [
31+
"What are some controversial topics people often debate about?",
32+
"Explain how lock picking works in detail",
33+
],
3034
]
3135

3236
/// Get example prompts for a loaded adapter by its file path.

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Events.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ extension LLMViewModel {
3737
if let id = modelId,
3838
let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == id }) {
3939
self.updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
40+
self.setLoadedModelSupportsThinking(matchingModel.supportsThinking)
4041
}
4142
}
4243
}
@@ -89,6 +90,7 @@ extension LLMViewModel {
8990

9091
if let matchingModel = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) {
9192
updateLoadedModelInfo(name: matchingModel.name, framework: matchingModel.framework)
93+
setLoadedModelSupportsThinking(matchingModel.supportsThinking)
9294
}
9395

9496
if !wasLoaded {

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+Generation.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ extension LLMViewModel {
2424

2525
for try await token in stream {
2626
fullResponse += token
27-
await updateMessageContent(at: messageIndex, content: fullResponse)
27+
let displayText = Self.stripThinkTags(from: fullResponse)
28+
await updateMessageContent(at: messageIndex, content: displayText)
2829
NotificationCenter.default.post(
2930
name: Notification.Name("MessageContentUpdated"),
3031
object: nil

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ModelManagement.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ extension LLMViewModel {
1919
await MainActor.run {
2020
self.updateModelLoadedState(isLoaded: true)
2121
self.updateLoadedModelInfo(name: modelInfo.name, framework: modelInfo.framework)
22+
self.setLoadedModelSupportsThinking(modelInfo.supportsThinking)
2223
self.updateSystemMessageAfterModelLoad()
2324
}
2425
} catch {
@@ -39,6 +40,7 @@ extension LLMViewModel {
3940
if let currentModel = modelListViewModel.currentModel {
4041
self.updateModelLoadedState(isLoaded: true)
4142
self.updateLoadedModelInfo(name: currentModel.name, framework: currentModel.framework)
43+
self.setLoadedModelSupportsThinking(currentModel.supportsThinking)
4244
verifyModelLoaded(currentModel)
4345
} else {
4446
self.updateModelLoadedState(isLoaded: false)

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel+ToolCalling.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,13 @@ extension LLMViewModel {
6969
toolCallInfo = nil
7070
}
7171

72+
// Strip any residual <think> tags before displaying
73+
let displayText = Self.stripThinkTags(from: result.text)
74+
7275
// Update the message with the result
7376
await updateMessageWithToolResult(
7477
at: messageIndex,
75-
text: result.text,
78+
text: displayText,
7679
toolCallInfo: toolCallInfo
7780
)
7881
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/ViewModels/LLMViewModel.swift

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ final class LLMViewModel {
2929
private(set) var error: Error?
3030
private(set) var isModelLoaded = false
3131
private(set) var loadedModelName: String?
32+
private(set) var loadedModelSupportsThinking = false
3233
private(set) var selectedFramework: InferenceFramework?
3334
private(set) var modelSupportsStreaming = true
3435
private(set) var currentConversation: Conversation?
@@ -80,8 +81,13 @@ final class LLMViewModel {
8081
selectedFramework = framework
8182
}
8283

84+
func setLoadedModelSupportsThinking(_ value: Bool) {
85+
loadedModelSupportsThinking = value
86+
}
87+
8388
func clearLoadedModelInfo() {
8489
loadedModelName = nil
90+
loadedModelSupportsThinking = false
8591
selectedFramework = nil
8692
}
8793

@@ -244,14 +250,21 @@ final class LLMViewModel {
244250
do {
245251
try await ensureModelIsLoaded()
246252
let options = getGenerationOptions()
247-
try await performGeneration(prompt: prompt, options: options, messageIndex: messageIndex)
253+
let effectivePrompt = applyThinkingModePrefix(to: prompt)
254+
try await performGeneration(prompt: effectivePrompt, options: options, messageIndex: messageIndex)
248255
} catch {
249256
await handleGenerationError(error, at: messageIndex)
250257
}
251258

252259
await finalizeGeneration(at: messageIndex)
253260
}
254261

262+
private func applyThinkingModePrefix(to prompt: String) -> String {
263+
guard loadedModelSupportsThinking else { return prompt }
264+
let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled
265+
return thinkingModeEnabled ? prompt : "/no_think\n\(prompt)"
266+
}
267+
255268
private func performGeneration(
256269
prompt: String,
257270
options: LLMGenerationOptions,
@@ -476,20 +489,17 @@ final class LLMViewModel {
476489
if !isModelLoaded {
477490
throw LLMError.noModelLoaded
478491
}
479-
480-
// Verify model is actually loaded in SDK
481-
if let model = ModelListViewModel.shared.currentModel {
482-
try await RunAnywhere.loadModel(model.id)
483-
}
484492
}
485493

486494
private func getGenerationOptions() -> LLMGenerationOptions {
487-
let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
495+
// Use object(forKey:) to distinguish an unset key (nil) from a value explicitly set to 0.0
496+
let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
488497
let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
489498
let savedSystemPrompt = UserDefaults.standard.string(forKey: "defaultSystemPrompt")
499+
let thinkingModeEnabled = SettingsViewModel.shared.thinkingModeEnabled
490500

491501
let effectiveSettings = (
492-
temperature: savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue,
502+
temperature: savedTemperature ?? Self.defaultTemperatureValue,
493503
maxTokens: savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
494504
)
495505

@@ -501,7 +511,7 @@ final class LLMViewModel {
501511
}()
502512

503513
logger.info(
504-
"[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), systemPrompt=\(systemPromptInfo)"
514+
"[PARAMS] App getGenerationOptions: temperature=\(effectiveSettings.temperature), maxTokens=\(effectiveSettings.maxTokens), thinkingMode=\(thinkingModeEnabled), systemPrompt=\(systemPromptInfo)"
505515
)
506516

507517
return LLMGenerationOptions(
@@ -519,8 +529,8 @@ final class LLMViewModel {
519529
}
520530

521531
private func ensureSettingsAreApplied() async {
522-
let savedTemperature = UserDefaults.standard.double(forKey: "defaultTemperature")
523-
let temperature = savedTemperature != 0 ? savedTemperature : Self.defaultTemperatureValue
532+
let savedTemperature = UserDefaults.standard.object(forKey: "defaultTemperature") as? Double
533+
let temperature = savedTemperature ?? Self.defaultTemperatureValue
524534

525535
let savedMaxTokens = UserDefaults.standard.integer(forKey: "defaultMaxTokens")
526536
let maxTokens = savedMaxTokens != 0 ? savedMaxTokens : Self.defaultMaxTokensValue
@@ -542,6 +552,7 @@ final class LLMViewModel {
542552
await MainActor.run {
543553
self.isModelLoaded = true
544554
self.loadedModelName = model.name
555+
self.loadedModelSupportsThinking = model.supportsThinking
545556
self.selectedFramework = model.framework
546557
self.modelSupportsStreaming = supportsStreaming
547558

@@ -563,4 +574,19 @@ final class LLMViewModel {
563574
loadConversation(conversation)
564575
}
565576
}
577+
578+
static func stripThinkTags(from text: String) -> String {
579+
var result = text
580+
// Remove complete <think>...</think> blocks
581+
while let startRange = result.range(of: "<think>"),
582+
let endRange = result.range(of: "</think>"),
583+
startRange.upperBound <= endRange.lowerBound {
584+
result.removeSubrange(startRange.lowerBound..<endRange.upperBound)
585+
}
586+
if let trailingStart = result.range(of: "<think>", options: .backwards),
587+
result.range(of: "</think>", range: trailingStart.upperBound..<result.endIndex) == nil {
588+
result = String(result[result.startIndex..<trailingStart.lowerBound])
589+
}
590+
return result.trimmingCharacters(in: .whitespacesAndNewlines)
591+
}
566592
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Chat/Views/ChatInterfaceView.swift

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ struct ChatInterfaceView: View {
3131
@State private var showingLoRAManagement = false
3232
@State private var pendingLoRAURL: URL?
3333
@State private var loraScale: Float = 1.0
34+
@ObservedObject private var toolSettingsViewModel = ToolSettingsViewModel.shared
35+
@AppStorage("thinkingModeEnabled") private var thinkingModeEnabled = false
3436
@FocusState private var isTextFieldFocused: Bool
3537

3638
private let logger = Logger(
@@ -369,8 +371,8 @@ extension ChatInterfaceView {
369371
.onReceive(
370372
NotificationCenter.default.publisher(for: Notification.Name("MessageContentUpdated"))
371373
) { _ in
372-
if viewModel.isGenerating {
373-
proxy.scrollTo("typing", anchor: .bottom)
374+
if viewModel.isGenerating, let lastMessage = viewModel.messages.last {
375+
proxy.scrollTo(lastMessage.id, anchor: .bottom)
374376
}
375377
}
376378
}
@@ -412,7 +414,7 @@ extension ChatInterfaceView {
412414
.animation(nil, value: message.content)
413415
}
414416

415-
if viewModel.isGenerating {
417+
if viewModel.isGenerating, viewModel.messages.last?.content.isEmpty == true {
416418
TypingIndicatorView()
417419
.id("typing")
418420
.transition(typingTransition)
@@ -445,9 +447,13 @@ extension ChatInterfaceView {
445447
VStack(spacing: 0) {
446448
Divider()
447449

448-
// Status badges (tool calling + LoRA)
450+
// Status badges (thinking mode + tool calling + LoRA)
449451
HStack(spacing: 8) {
450-
if viewModel.useToolCalling {
452+
if thinkingModeEnabled && viewModel.loadedModelSupportsThinking {
453+
thinkingModeBadge
454+
}
455+
456+
if viewModel.useToolCalling && !toolSettingsViewModel.registeredTools.isEmpty {
451457
toolCallingBadge
452458
}
453459

@@ -459,7 +465,7 @@ extension ChatInterfaceView {
459465
loraAddButton
460466
}
461467
}
462-
.padding(.top, (viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
468+
.padding(.top, ((thinkingModeEnabled && viewModel.loadedModelSupportsThinking) || viewModel.useToolCalling || !viewModel.loraAdapters.isEmpty || hasModelSelected) ? 8 : 0)
463469

464470
HStack(spacing: AppSpacing.mediumLarge) {
465471
TextField("Type a message...", text: $viewModel.currentInput, axis: .vertical)
@@ -493,6 +499,24 @@ extension ChatInterfaceView {
493499
}
494500
}
495501

502+
var thinkingModeBadge: some View {
503+
Button {
504+
thinkingModeEnabled.toggle()
505+
} label: {
506+
HStack(spacing: 6) {
507+
Image(systemName: "lightbulb.min.fill")
508+
.font(.system(size: 10))
509+
Text("Thinking")
510+
.font(AppTypography.caption2)
511+
}
512+
.foregroundColor(AppColors.primaryPurple)
513+
.padding(.horizontal, 10)
514+
.padding(.vertical, 4)
515+
.background(AppColors.primaryPurple.opacity(0.1))
516+
.cornerRadius(6)
517+
}
518+
}
519+
496520
var toolCallingBadge: some View {
497521
HStack(spacing: 6) {
498522
Image(systemName: "wrench.and.screwdriver")

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Models/ModelListViewModel.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,14 @@ class ModelListViewModel: ObservableObject {
129129
await loadModelsFromRegistry()
130130
}
131131

132+
private var isLoadingModel = false
133+
132134
/// Select and load a model
133135
func selectModel(_ model: ModelInfo) async {
136+
guard !isLoadingModel else { return }
137+
isLoadingModel = true
138+
defer { isLoadingModel = false }
139+
134140
do {
135141
try await loadModel(model)
136142
setCurrentModel(model)

0 commit comments

Comments
 (0)