Skip to content

Commit bc7db9b

Browse files
Merge pull request #477 from RunanywhereAI/smonga/merge-queue-integration
Merging the 9-PR integration into main. All SDKs (iOS Swift, Kotlin JVM, Web, C++ commons) compile clean. C++ commons test suite: 35/35 passing. Android, Flutter, and Web example apps compile clean. iOS example app has a known link failure against the committed prebuilt RACommons.xcframework (which predates #471 VAD API + #469 timing API) — a fresh ./scripts/build-swift.sh --setup is required post-merge to regenerate the xcframeworks. Preserving the individual merge commits for traceability (no squash).
2 parents 49279b7 + 14375bf commit bc7db9b

287 files changed

Lines changed: 15635 additions & 1449 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ tools/
388388
sdk/runanywhere-react-native/packages/rag/ios/.testlocal
389389

390390

391+
# Python virtual environments
392+
.venv*/
393+
venv*/
394+
__pycache__/
395+
*.pyc
396+
391397
# Node
392398
node_modules/
393399
/tools/

.idea/vcs.xml

Lines changed: 13 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Package.swift

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ let useLocalBinaries = false // Toggle: true for local dev, false for release
4343
// Updated automatically by CI/CD during releases
4444
let sdkVersion = "0.19.7"
4545

46+
// MetalRT remote binary availability flag.
47+
// Set to `false` until a real checksum for RABackendMetalRT-v<sdkVersion>.zip
48+
// has been published. When `false`, the MetalRT product/targets are only
49+
// exposed under `useLocalBinaries = true`, so SPM resolution will not fail
50+
// for external consumers due to a placeholder checksum.
51+
let metalrtRemoteBinaryAvailable = false
52+
53+
let includeMetalRT = useLocalBinaries || metalrtRemoteBinaryAvailable
54+
4655
let package = Package(
4756
name: "runanywhere-sdks",
4857
platforms: [
@@ -81,7 +90,8 @@ let package = Package(
8190
name: "RunAnywhereWhisperKit",
8291
targets: ["WhisperKitRuntime"]
8392
),
84-
],
93+
94+
] + metalRTProducts(),
8595
dependencies: [
8696
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
8797
.package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -221,9 +231,62 @@ let package = Package(
221231
path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
222232
),
223233

224-
] + binaryTargets()
234+
] + metalRTTargets() + binaryTargets()
225235
)
226236

237+
// =============================================================================
238+
// METALRT PRODUCT / TARGET GATING
239+
// =============================================================================
240+
// The RABackendMetalRT.xcframework is not yet published to GitHub releases
241+
// with a real checksum. To avoid SPM resolution failures for external
242+
// consumers due to a placeholder zero-checksum binary target, the MetalRT
243+
// product and its dependent targets are only included when:
244+
// - `useLocalBinaries == true` (local dev with a checked-out xcframework), or
245+
// - `metalrtRemoteBinaryAvailable == true` (once a real checksum is wired in).
246+
func metalRTProducts() -> [Product] {
247+
guard includeMetalRT else { return [] }
248+
return [
249+
.library(
250+
name: "RunAnywhereMetalRT",
251+
targets: ["MetalRTRuntime"]
252+
),
253+
]
254+
}
255+
256+
func metalRTTargets() -> [Target] {
257+
guard includeMetalRT else { return [] }
258+
return [
259+
// MetalRT C Bridge Module - exposes rac_backend_metalrt_register()
260+
.target(
261+
name: "MetalRTBackend",
262+
dependencies: ["RABackendMetalRTBinary"],
263+
path: "sdk/runanywhere-swift/Sources/MetalRTRuntime/include",
264+
publicHeadersPath: "."
265+
),
266+
// MetalRT Runtime Backend (custom Metal GPU kernels)
267+
.target(
268+
name: "MetalRTRuntime",
269+
dependencies: [
270+
"RunAnywhere",
271+
"MetalRTBackend",
272+
"RABackendMetalRTBinary",
273+
],
274+
path: "sdk/runanywhere-swift/Sources/MetalRTRuntime",
275+
exclude: ["include"],
276+
resources: [
277+
.copy("Resources/default.metallib"),
278+
],
279+
linkerSettings: [
280+
.linkedLibrary("c++"),
281+
.linkedFramework("Accelerate"),
282+
.linkedFramework("Metal"),
283+
.linkedFramework("CoreGraphics"),
284+
.linkedFramework("ImageIO"),
285+
]
286+
),
287+
]
288+
}
289+
227290
// =============================================================================
228291
// BINARY TARGET SELECTION
229292
// =============================================================================
@@ -251,6 +314,10 @@ func binaryTargets() -> [Target] {
251314
name: "RABackendONNXBinary",
252315
path: "sdk/runanywhere-swift/Binaries/RABackendONNX.xcframework"
253316
),
317+
.binaryTarget(
318+
name: "RABackendMetalRTBinary",
319+
path: "sdk/runanywhere-swift/Binaries/RABackendMetalRT.xcframework"
320+
),
254321
]
255322

256323
// ONNX Runtime xcframeworks - split by platform
@@ -302,6 +369,19 @@ func binaryTargets() -> [Target] {
302369
),
303370
]
304371

372+
// MetalRT remote binary is only appended once a real checksum has been
373+
// published. Until then the MetalRT product/targets are omitted from
374+
// the package graph entirely (see metalRTProducts/metalRTTargets).
375+
if metalrtRemoteBinaryAvailable {
376+
targets.append(
377+
.binaryTarget(
378+
name: "RABackendMetalRTBinary",
379+
url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendMetalRT-v\(sdkVersion).zip",
380+
checksum: "0000000000000000000000000000000000000000000000000000000000000000" // TODO: replace with real checksum
381+
)
382+
)
383+
}
384+
305385
return targets
306386
}
307387
}

Playground/YapRun/YapRun/Features/Playground/PlaygroundView.swift

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ struct PlaygroundView: View {
4141
}
4242
}
4343
.task { await viewModel.checkModelStatus() }
44+
.onDisappear {
45+
// Release audio resources when leaving the tab to prevent conflicts
46+
// with FlowSessionManager's AudioCaptureManager.
47+
if viewModel.isRecording {
48+
Task { await viewModel.toggleRecording() }
49+
}
50+
}
4451
.onReceive(NotificationCenter.default.publisher(for: UIApplication.willEnterForegroundNotification)) { _ in
4552
Task { await viewModel.checkModelStatus() }
4653
}
@@ -126,18 +133,38 @@ struct PlaygroundView: View {
126133
if viewModel.isRecording {
127134
// Recording indicator
128135
VStack(spacing: 12) {
129-
// Elapsed time
130-
Text(formatTime(viewModel.elapsedSeconds))
131-
.font(.system(size: 20, weight: .semibold, design: .monospaced))
132-
.foregroundStyle(Color.red)
136+
// Elapsed time + speech indicator
137+
HStack(spacing: 8) {
138+
Text(formatTime(viewModel.elapsedSeconds))
139+
.font(.system(size: 20, weight: .semibold, design: .monospaced))
140+
.foregroundStyle(Color.red)
141+
142+
if viewModel.isAutoStopEnabled {
143+
Circle()
144+
.fill(viewModel.speechDetected ? Color.green : Color.gray.opacity(0.4))
145+
.frame(width: 8, height: 8)
146+
}
147+
}
133148

134149
// Waveform bars
135150
WaveformBars(level: viewModel.audioLevel)
136151
}
137152
} else {
138-
Text(viewModel.transcription.isEmpty ? "Tap to record" : "Tap to record again")
139-
.font(.subheadline)
140-
.foregroundStyle(AppColors.textTertiary)
153+
VStack(spacing: 12) {
154+
Text(viewModel.transcription.isEmpty ? "Tap to record" : "Tap to record again")
155+
.font(.subheadline)
156+
.foregroundStyle(AppColors.textTertiary)
157+
158+
// Auto-stop toggle
159+
Toggle(isOn: $viewModel.isAutoStopEnabled) {
160+
Label("Auto-stop on silence", systemImage: "waveform.badge.minus")
161+
.font(.caption)
162+
.foregroundStyle(AppColors.textSecondary)
163+
}
164+
.toggleStyle(.switch)
165+
.tint(AppColors.primaryGreen)
166+
.frame(width: 240)
167+
}
141168
}
142169
}
143170
}

Playground/YapRun/YapRun/Features/Playground/PlaygroundViewModel.swift

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,21 @@ final class PlaygroundViewModel {
2525
var errorMessage: String?
2626
var modelName: String?
2727

28+
// MARK: - Auto-Stop (VAD)
29+
30+
var isAutoStopEnabled = false
31+
var speechDetected = false
32+
2833
// MARK: - Private
2934

3035
private let audioCapture = AudioCaptureManager()
3136
private var audioBuffer = Foundation.Data()
3237
private var timerTask: Task<Void, Never>?
38+
private var vadMonitorTask: Task<Void, Never>?
39+
private var vadProcessedBytes = 0
40+
private var silenceStartTime: Date?
41+
private var hasSpeechBeenDetected = false
42+
private let autoStopSilenceDuration: TimeInterval = 2.0
3343
private let logger = Logger(subsystem: "com.runanywhere.yaprun", category: "Playground")
3444

3545
// MARK: - Model Check
@@ -58,6 +68,15 @@ final class PlaygroundViewModel {
5868
return
5969
}
6070

71+
// Prevent conflict with active voice keyboard session (iOS-only).
72+
// FlowSessionManager is compiled `#if os(iOS)` so this check is skipped on macOS.
73+
#if os(iOS)
74+
guard !FlowSessionManager.shared.isActive else {
75+
errorMessage = "Voice keyboard session is active. End it first."
76+
return
77+
}
78+
#endif
79+
6180
let permitted = await audioCapture.requestPermission()
6281
guard permitted else {
6382
errorMessage = "Microphone access is required."
@@ -71,7 +90,7 @@ final class PlaygroundViewModel {
7190

7291
do {
7392
// AudioCaptureManager dispatches this callback on DispatchQueue.main
74-
try audioCapture.startRecording { [weak self] data in
93+
try await audioCapture.startRecording { [weak self] data in
7594
MainActor.assumeIsolated {
7695
guard let self else { return }
7796
self.audioBuffer.append(data)
@@ -80,7 +99,10 @@ final class PlaygroundViewModel {
8099
}
81100
isRecording = true
82101
startTimer()
83-
logger.info("Recording started")
102+
if isAutoStopEnabled {
103+
startVADMonitoring()
104+
}
105+
logger.info("Recording started (autoStop=\(self.isAutoStopEnabled))")
84106
} catch {
85107
errorMessage = "Could not start microphone: \(error.localizedDescription)"
86108
logger.error("Recording start failed: \(error.localizedDescription)")
@@ -91,8 +113,11 @@ final class PlaygroundViewModel {
91113
audioCapture.stopRecording()
92114
isRecording = false
93115
audioLevel = 0
116+
speechDetected = false
94117
timerTask?.cancel()
95118
timerTask = nil
119+
vadMonitorTask?.cancel()
120+
vadMonitorTask = nil
96121

97122
guard !audioBuffer.isEmpty else {
98123
errorMessage = "No audio was captured."
@@ -133,5 +158,63 @@ final class PlaygroundViewModel {
133158
audioBuffer = Foundation.Data()
134159
errorMessage = nil
135160
elapsedSeconds = 0
161+
speechDetected = false
162+
}
163+
164+
// MARK: - VAD Monitoring
165+
166+
private func startVADMonitoring() {
167+
vadProcessedBytes = 0
168+
hasSpeechBeenDetected = false
169+
silenceStartTime = nil
170+
speechDetected = false
171+
172+
vadMonitorTask = Task { [weak self] in
173+
while !Task.isCancelled {
174+
try? await Task.sleep(nanoseconds: 100_000_000) // 100ms
175+
guard let self, !Task.isCancelled, self.isRecording else { break }
176+
await self.processVADChunk()
177+
}
178+
}
179+
}
180+
181+
private func processVADChunk() async {
182+
let currentSize = audioBuffer.count
183+
guard currentSize > vadProcessedBytes else { return }
184+
185+
let newData = audioBuffer.subdata(in: vadProcessedBytes..<currentSize)
186+
vadProcessedBytes = currentSize
187+
188+
let samples = convertInt16ToFloat(newData)
189+
guard !samples.isEmpty else { return }
190+
191+
do {
192+
let isSpeech = try await RunAnywhere.detectSpeech(in: samples)
193+
speechDetected = isSpeech
194+
195+
if isSpeech {
196+
hasSpeechBeenDetected = true
197+
silenceStartTime = nil
198+
} else if hasSpeechBeenDetected {
199+
if silenceStartTime == nil {
200+
silenceStartTime = Date()
201+
} else if let start = silenceStartTime,
202+
Date().timeIntervalSince(start) >= autoStopSilenceDuration
203+
{
204+
logger.info("Auto-stop: \(self.autoStopSilenceDuration)s silence after speech")
205+
await stopAndTranscribe()
206+
}
207+
}
208+
} catch {
209+
logger.error("VAD error: \(error.localizedDescription)")
210+
}
211+
}
212+
213+
private func convertInt16ToFloat(_ data: Foundation.Data) -> [Float] {
214+
let sampleCount = data.count / MemoryLayout<Int16>.size
215+
return data.withUnsafeBytes { rawBuffer in
216+
let int16Buffer = rawBuffer.bindMemory(to: Int16.self)
217+
return (0..<sampleCount).map { Float(int16Buffer[$0]) / 32768.0 }
218+
}
136219
}
137220
}

0 commit comments

Comments
 (0)