Skip to content

Commit ff14d12

Browse files
VyasGurusanchitmonga22
authored andcommitted
Add batch transcription in voice agent
1 parent 3d12e83 commit ff14d12

7 files changed

Lines changed: 289 additions & 126 deletions

File tree

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/RAG/ViewModels/RAGViewModel.swift

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,50 @@ enum MessageRole {
1919
case system
2020
}
2121

22+
// MARK: - RAG Message
23+
24+
struct RAGMessage: Identifiable {
25+
let id = UUID()
26+
let role: MessageRole
27+
let text: String
28+
let thinkingContent: String?
29+
30+
init(role: MessageRole, text: String, thinkingContent: String? = nil) {
31+
self.role = role
32+
self.text = text
33+
self.thinkingContent = thinkingContent
34+
}
35+
36+
// MARK: - Think Tag Helpers
37+
38+
/// Extract the content inside `<think>...</think>` tags.
39+
static func extractThinkingContent(from text: String) -> String? {
40+
guard let startRange = text.range(of: "<think>"),
41+
let endRange = text.range(of: "</think>"),
42+
startRange.upperBound <= endRange.lowerBound else {
43+
return nil
44+
}
45+
let content = String(text[startRange.upperBound..<endRange.lowerBound])
46+
.trimmingCharacters(in: .whitespacesAndNewlines)
47+
return content.isEmpty ? nil : content
48+
}
49+
50+
/// Strip all `<think>...</think>` blocks and trailing incomplete `<think>` tags.
51+
static func stripThinkTags(from text: String) -> String {
52+
var result = text
53+
while let startRange = result.range(of: "<think>"),
54+
let endRange = result.range(of: "</think>"),
55+
startRange.upperBound <= endRange.lowerBound {
56+
result.removeSubrange(startRange.lowerBound..<endRange.upperBound)
57+
}
58+
if let trailingStart = result.range(of: "<think>", options: .backwards),
59+
result.range(of: "</think>", range: trailingStart.upperBound..<result.endIndex) == nil {
60+
result = String(result[result.startIndex..<trailingStart.lowerBound])
61+
}
62+
return result.trimmingCharacters(in: .whitespacesAndNewlines)
63+
}
64+
}
65+
2266
// MARK: - RAG View Model
2367

2468
@MainActor
@@ -33,7 +77,7 @@ final class RAGViewModel {
3377

3478
// MARK: - Query State
3579

36-
private(set) var messages: [(role: MessageRole, text: String)] = []
80+
private(set) var messages: [RAGMessage] = []
3781
private(set) var isQuerying = false
3882
/// Settable from the view layer to surface file-picker failures in the error banner.
3983
var error: Error?
@@ -97,7 +141,7 @@ final class RAGViewModel {
97141
guard !question.isEmpty else { return }
98142
guard isDocumentLoaded else { return }
99143

100-
messages.append((role: .user, text: question))
144+
messages.append(RAGMessage(role: .user, text: question))
101145
currentQuestion = ""
102146
isQuerying = true
103147
error = nil
@@ -117,11 +161,13 @@ final class RAGViewModel {
117161

118162
logger.info("Querying RAG pipeline: \(question)")
119163
let result = try await RunAnywhere.ragQuery(question: effectiveQuestion)
120-
messages.append((role: .assistant, text: result.answer))
164+
let thinkingContent = RAGMessage.extractThinkingContent(from: result.answer)
165+
let displayText = RAGMessage.stripThinkTags(from: result.answer)
166+
messages.append(RAGMessage(role: .assistant, text: displayText, thinkingContent: thinkingContent))
121167
logger.info("Query complete (\(result.totalTimeMs, format: .fixed(precision: 0))ms)")
122168
} catch {
123169
self.error = error
124-
messages.append((role: .assistant, text: "Error: \(error.localizedDescription)"))
170+
messages.append(RAGMessage(role: .assistant, text: "Error: \(error.localizedDescription)"))
125171
logger.error("Query failed: \(error.localizedDescription)")
126172
}
127173
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/RAG/Views/DocumentRAGView.swift

Lines changed: 127 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,9 @@ extension DocumentRAGView {
384384
Spacer(minLength: AppSpacing.large)
385385
.id("top-spacer")
386386

387-
ForEach(Array(viewModel.messages.enumerated()), id: \.offset) { index, message in
387+
ForEach(viewModel.messages) { message in
388388
RAGMessageBubble(message: message)
389-
.id(index)
389+
.id(message.id)
390390
}
391391

392392
if viewModel.isQuerying {
@@ -412,8 +412,8 @@ extension DocumentRAGView {
412412
withAnimation(.easeInOut(duration: AppLayout.animationFast)) {
413413
if viewModel.isQuerying {
414414
proxy.scrollTo("querying", anchor: .bottom)
415-
} else if !viewModel.messages.isEmpty {
416-
proxy.scrollTo(viewModel.messages.count - 1, anchor: .bottom)
415+
} else if let lastMessage = viewModel.messages.last {
416+
proxy.scrollTo(lastMessage.id, anchor: .bottom)
417417
}
418418
}
419419
}
@@ -503,31 +503,143 @@ extension DocumentRAGView {
503503
// MARK: - RAG Message Bubble
504504

505505
private struct RAGMessageBubble: View {
506-
let message: (role: MessageRole, text: String)
506+
let message: RAGMessage
507+
@State private var isThinkingExpanded = false
507508

508509
private var isUser: Bool {
509510
message.role == .user
510511
}
511512

513+
private var hasThinking: Bool {
514+
message.thinkingContent != nil && !(message.thinkingContent?.isEmpty ?? true)
515+
}
516+
512517
var body: some View {
513518
HStack(alignment: .bottom, spacing: AppSpacing.smallMedium) {
514519
if isUser { Spacer(minLength: AppSpacing.xxxLarge) }
515520

516-
Text(message.text)
517-
.font(.body)
518-
.foregroundColor(isUser ? .white : AppColors.textPrimary)
519-
.padding(.horizontal, AppSpacing.mediumLarge)
520-
.padding(.vertical, AppSpacing.smallMedium)
521+
VStack(alignment: .leading, spacing: 4) {
522+
if !isUser && hasThinking {
523+
thinkingSection
524+
}
525+
526+
Text(message.text)
527+
.font(.body)
528+
.foregroundColor(isUser ? .white : AppColors.textPrimary)
529+
.padding(.horizontal, AppSpacing.mediumLarge)
530+
.padding(.vertical, AppSpacing.smallMedium)
531+
.background(
532+
isUser
533+
? AppColors.messageBubbleUser
534+
: AppColors.messageBubbleAssistant
535+
)
536+
.cornerRadius(AppSpacing.cornerRadiusBubble)
537+
}
538+
539+
if !isUser { Spacer(minLength: AppSpacing.xxxLarge) }
540+
}
541+
}
542+
543+
// MARK: - Thinking Section
544+
545+
private var thinkingSection: some View {
546+
VStack(alignment: .leading, spacing: AppSpacing.small) {
547+
Button {
548+
withAnimation(.easeInOut(duration: AppLayout.animationFast)) {
549+
isThinkingExpanded.toggle()
550+
}
551+
} label: {
552+
HStack(spacing: 8) {
553+
Image(systemName: "lightbulb.min")
554+
.font(AppTypography.caption)
555+
.foregroundColor(AppColors.primaryPurple)
556+
557+
Text(isThinkingExpanded ? "Hide reasoning" : thinkingSummary)
558+
.font(AppTypography.caption)
559+
.foregroundColor(AppColors.primaryPurple)
560+
.lineLimit(1)
561+
562+
Spacer()
563+
564+
Image(systemName: isThinkingExpanded ? "chevron.up" : "chevron.right")
565+
.font(AppTypography.caption2)
566+
.foregroundColor(AppColors.primaryPurple.opacity(0.6))
567+
}
568+
.padding(.horizontal, AppSpacing.regular)
569+
.padding(.vertical, AppSpacing.padding9)
521570
.background(
522-
isUser
523-
? AppColors.messageBubbleUser
524-
: AppColors.messageBubbleAssistant
571+
RoundedRectangle(cornerRadius: AppSpacing.mediumLarge)
572+
.fill(
573+
LinearGradient(
574+
colors: [
575+
AppColors.primaryPurple.opacity(0.1),
576+
AppColors.primaryPurple.opacity(0.05)
577+
],
578+
startPoint: .topLeading,
579+
endPoint: .bottomTrailing
580+
)
581+
)
582+
.shadow(color: AppColors.primaryPurple.opacity(0.2), radius: 2, x: 0, y: 1)
583+
.overlay(
584+
RoundedRectangle(cornerRadius: AppSpacing.mediumLarge)
585+
.strokeBorder(
586+
AppColors.primaryPurple.opacity(0.2),
587+
lineWidth: AppSpacing.strokeThin
588+
)
589+
)
525590
)
526-
.cornerRadius(AppSpacing.cornerRadiusBubble)
591+
}
592+
.buttonStyle(PlainButtonStyle())
527593

528-
if !isUser { Spacer(minLength: AppSpacing.xxxLarge) }
594+
if isThinkingExpanded {
595+
ScrollView {
596+
Text(message.thinkingContent ?? "")
597+
.font(AppTypography.caption)
598+
.foregroundColor(AppColors.textSecondary)
599+
.frame(maxWidth: .infinity, alignment: .leading)
600+
.fixedSize(horizontal: false, vertical: true)
601+
.multilineTextAlignment(.leading)
602+
}
603+
.frame(maxHeight: AppSpacing.minFrameHeight)
604+
.padding(AppSpacing.mediumLarge)
605+
.background(
606+
RoundedRectangle(cornerRadius: AppSpacing.medium)
607+
.fill(AppColors.backgroundGray6)
608+
)
609+
.transition(.asymmetric(
610+
insertion: .opacity.combined(with: .slide),
611+
removal: .opacity.combined(with: .slide)
612+
))
613+
}
529614
}
530615
}
616+
617+
private var thinkingSummary: String {
618+
guard let thinking = message.thinkingContent?
619+
.trimmingCharacters(in: .whitespacesAndNewlines) else {
620+
return ""
621+
}
622+
623+
let sentences = thinking.components(separatedBy: CharacterSet(charactersIn: ".!?"))
624+
.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
625+
626+
if sentences.count >= 2 {
627+
let firstSentence = sentences[0].trimmingCharacters(in: .whitespacesAndNewlines)
628+
if firstSentence.count > 20 {
629+
return firstSentence + "..."
630+
}
631+
}
632+
633+
if thinking.count > 80 {
634+
let truncated = String(thinking.prefix(80))
635+
if let lastSpace = truncated.lastIndex(of: " ") {
636+
return String(truncated[..<lastSpace]) + "..."
637+
}
638+
return truncated + "..."
639+
}
640+
641+
return thinking
642+
}
531643
}
532644

533645

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Settings/SettingsViewModel.swift

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import Foundation
1010
import SwiftUI
1111
import RunAnywhere
1212
import Combine
13+
import os
1314

1415
@MainActor
1516
class SettingsViewModel: ObservableObject {
@@ -45,6 +46,7 @@ class SettingsViewModel: ObservableObject {
4546

4647
// MARK: - Private Properties
4748

49+
private let logger = Logger(subsystem: "com.runanywhere.RunAnywhereAI", category: "Settings")
4850
private var cancellables = Set<AnyCancellable>()
4951
private let keychainService = KeychainService.shared
5052
private let apiKeyStorageKey = "runanywhere_api_key"
@@ -99,32 +101,34 @@ class SettingsViewModel: ObservableObject {
99101
}
100102

101103
private func subscribeToModelNotifications() {
102-
NotificationCenter.default.addObserver(
103-
self,
104-
selector: #selector(handleModelLoaded(_:)),
105-
name: Notification.Name("ModelLoaded"),
106-
object: nil
107-
)
108-
NotificationCenter.default.addObserver(
109-
self,
110-
selector: #selector(handleModelUnloaded),
111-
name: Notification.Name("ModelUnloaded"),
112-
object: nil
113-
)
114-
}
115-
116-
@objc private func handleModelLoaded(_ notification: Notification) {
117-
if let model = notification.object as? ModelInfo {
118-
loadedModelSupportsThinking = model.supportsThinking
119-
}
104+
// Subscribe to SDK events directly so any LLM model load
105+
// (from chat, voice agent, or RAG) updates the thinking mode flag.
106+
RunAnywhere.events.events
107+
.receive(on: DispatchQueue.main)
108+
.sink { [weak self] event in
109+
Task { @MainActor in
110+
self?.handleSDKEvent(event)
111+
}
112+
}
113+
.store(in: &cancellables)
120114
}
121115

122-
@objc private func handleModelUnloaded() {
123-
loadedModelSupportsThinking = false
124-
}
116+
private func handleSDKEvent(_ event: any SDKEvent) {
117+
guard event.category == .llm else { return }
125118

126-
deinit {
127-
NotificationCenter.default.removeObserver(self)
119+
switch event.type {
120+
case "llm_model_load_completed":
121+
let modelId = event.properties["model_id"] ?? ""
122+
if let model = ModelListViewModel.shared.availableModels.first(where: { $0.id == modelId }) {
123+
loadedModelSupportsThinking = model.supportsThinking
124+
logger.info("LLM loaded (\(modelId)), supportsThinking: \(model.supportsThinking)")
125+
}
126+
case "llm_model_unloaded":
127+
loadedModelSupportsThinking = false
128+
logger.info("LLM unloaded, thinking mode disabled")
129+
default:
130+
break
131+
}
128132
}
129133

130134
// MARK: - Setup

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Voice/VoiceAgentViewModel.swift

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,15 @@ final class VoiceAgentViewModel: ObservableObject {
150150
var instructionText: String {
151151
switch sessionState {
152152
case .listening:
153-
return "Listening... Pause to send"
153+
return "Tap to send · Hold to stop"
154154
case .processing:
155155
return "Processing your message..."
156156
case .speaking:
157157
return "Speaking..."
158158
case .connecting:
159159
return "Connecting..."
160+
case .connected:
161+
return "Tap to speak · Hold to end"
160162
default:
161163
return "Tap to start conversation"
162164
}
@@ -389,6 +391,7 @@ final class VoiceAgentViewModel: ObservableObject {
389391
do {
390392
let settings = SettingsViewModel.shared
391393
let voiceConfig = VoiceSessionConfig(
394+
continuousMode: false,
392395
thinkingModeEnabled: settings.loadedModelSupportsThinking && settings.thinkingModeEnabled,
393396
maxTokens: settings.maxTokens
394397
)
@@ -434,6 +437,14 @@ final class VoiceAgentViewModel: ObservableObject {
434437
logger.debug("Forced audio send")
435438
}
436439

440+
/// Resume listening on the current session (push-to-talk: user taps mic after turn completes)
441+
func resumeListening() async {
442+
await session?.resumeListening()
443+
sessionState = .listening
444+
currentStatus = "Listening..."
445+
logger.debug("Resumed listening")
446+
}
447+
437448
// MARK: - Session Event Handling
438449

439450
private func handleSessionEvent(_ event: VoiceSessionEvent) {
@@ -447,7 +458,7 @@ final class VoiceAgentViewModel: ObservableObject {
447458
case .speaking: sessionState = .speaking; currentStatus = "Speaking..."
448459
case let .turnCompleted(transcript, response, _, _):
449460
currentTranscript = transcript; assistantResponse = response
450-
sessionState = .listening; currentStatus = "Listening..."
461+
sessionState = .connected; currentStatus = "Ready"
451462
case .stopped: sessionState = .disconnected; currentStatus = "Ready"
452463
case .error(let message): logger.error("Session error: \(message)"); errorMessage = message
453464
}

0 commit comments

Comments
 (0)