Skip to content

Commit 296c8bd

Browse files
Revert "Shubham rag fix (#441)"
This reverts commit 838d057.
1 parent 838d057 commit 296c8bd

228 files changed

Lines changed: 17491 additions & 10599 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Package.swift

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,18 @@ import Foundation
3737
// ./scripts/build-swift.sh --set-remote (sets useLocalBinaries = false)
3838
//
3939
// =============================================================================
40-
let useLocalBinaries = true // Toggle: true for local dev, false for release
40+
let useLocalBinaries = false // Toggle: true for local dev, false for release
4141

4242
// Version for remote XCFrameworks (used when testLocal = false)
4343
// Updated automatically by CI/CD during releases
4444
let sdkVersion = "0.19.6"
4545

46+
// RAG binary is only available in local dev mode until the release artifact is published.
47+
// In remote mode, the RAG xcframework zip + checksum don't exist yet, so including the
48+
// binary target would block ALL SPM package resolution (not just RAG).
49+
// Set to true once RABackendRAG-v<version>.zip is published to GitHub releases.
50+
let ragRemoteBinaryAvailable = false
51+
4652
let package = Package(
4753
name: "runanywhere-sdks",
4854
platforms: [
@@ -81,7 +87,7 @@ let package = Package(
8187
name: "RunAnywhereWhisperKit",
8288
targets: ["WhisperKitRuntime"]
8389
),
84-
],
90+
] + ragProducts(),
8591
dependencies: [
8692
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
8793
.package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -146,7 +152,7 @@ let package = Package(
146152
.product(name: "StableDiffusion", package: "ml-stable-diffusion"),
147153
"CRACommons",
148154
"RACommonsBinary",
149-
],
155+
] + ragCoreDependencies(),
150156
path: "sdk/runanywhere-swift/Sources/RunAnywhere",
151157
exclude: ["CRACommons"],
152158
swiftSettings: [
@@ -225,9 +231,62 @@ let package = Package(
225231
path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
226232
),
227233

228-
] + binaryTargets()
234+
] + ragTargets() + binaryTargets()
229235
)
230236

237+
// =============================================================================
238+
// RAG TARGET HELPERS
239+
// =============================================================================
240+
// RAG targets are gated because the remote binary artifact doesn't exist yet.
241+
// Including a binary target with a placeholder checksum blocks ALL SPM resolution.
242+
243+
/// RAG product (library) — only included when the binary is available
244+
func ragProducts() -> [Product] {
245+
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
246+
return [
247+
.library(
248+
name: "RunAnywhereRAG",
249+
targets: ["RAGRuntime"]
250+
),
251+
]
252+
}
253+
254+
/// RAG dependency for the RunAnywhere core target
255+
/// NOTE: Core already accesses RAG C headers via CRACommons umbrella (rac_rag.h, rac_rag_pipeline.h).
256+
/// No additional dependency needed — RAGBackend is only used by RAGRuntime.
257+
func ragCoreDependencies() -> [Target.Dependency] {
258+
return []
259+
}
260+
261+
/// RAG-related targets (C bridge + Swift runtime)
262+
func ragTargets() -> [Target] {
263+
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
264+
return [
265+
// C Bridge Module - RAG Backend Headers
266+
.target(
267+
name: "RAGBackend",
268+
dependencies: ["RABackendRAGBinary"],
269+
path: "sdk/runanywhere-swift/Sources/RAGRuntime/include",
270+
publicHeadersPath: "."
271+
),
272+
// RAG Runtime Backend
273+
.target(
274+
name: "RAGRuntime",
275+
dependencies: [
276+
"RunAnywhere",
277+
"RAGBackend",
278+
"ONNXRuntime",
279+
"LlamaCPPRuntime",
280+
],
281+
path: "sdk/runanywhere-swift/Sources/RAGRuntime",
282+
exclude: ["include"],
283+
linkerSettings: [
284+
.linkedLibrary("c++"),
285+
]
286+
),
287+
]
288+
}
289+
231290
// =============================================================================
232291
// BINARY TARGET SELECTION
233292
// =============================================================================
@@ -255,6 +314,10 @@ func binaryTargets() -> [Target] {
255314
name: "RABackendONNXBinary",
256315
path: "sdk/runanywhere-swift/Binaries/RABackendONNX.xcframework"
257316
),
317+
.binaryTarget(
318+
name: "RABackendRAGBinary",
319+
path: "sdk/runanywhere-swift/Binaries/RABackendRAG.xcframework"
320+
),
258321
]
259322

260323
// ONNX Runtime xcframeworks - split by platform
@@ -306,6 +369,17 @@ func binaryTargets() -> [Target] {
306369
),
307370
]
308371

372+
// Only include RAG binary when the release artifact is available
373+
if ragRemoteBinaryAvailable {
374+
targets.append(
375+
.binaryTarget(
376+
name: "RABackendRAGBinary",
377+
url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
378+
checksum: "0000000000000000000000000000000000000000000000000000000000000000" // Replace with actual checksum
379+
)
380+
)
381+
}
382+
309383
return targets
310384
}
311385
}

examples/android/RunAnywhereAI/app/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ dependencies {
219219
// Pick the backends you need:
220220
implementation(project(":runanywhere-core-llamacpp")) // ~45MB - LLM text generation
221221
implementation(project(":runanywhere-core-onnx")) // ~30MB - STT, TTS, VAD
222-
// RAG pipeline is now part of the core SDK (not a separate module)
222+
implementation(project(":runanywhere-core-rag")) // RAG pipeline JNI bindings
223223

224224
// AndroidX Core & Lifecycle
225225
implementation(libs.androidx.core.ktx)

examples/android/RunAnywhereAI/settings.gradle.kts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,7 @@ include(":runanywhere-core-onnx")
5353
project(":runanywhere-core-onnx").projectDir =
5454
file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-onnx")
5555

56-
// RAG pipeline is now part of the core SDK (not a separate module).
57-
// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt
56+
// RAG module - Retrieval-Augmented Generation adapter
57+
include(":runanywhere-core-rag")
58+
project(":runanywhere-core-rag").projectDir =
59+
file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-rag")

examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart

Lines changed: 41 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import 'package:runanywhere_ai/core/design_system/app_spacing.dart';
1010
import 'package:runanywhere_ai/core/services/model_manager.dart';
1111
import 'package:runanywhere_ai/core/utilities/constants.dart';
1212
import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
13-
import 'package:runanywhere/public/extensions/rag_module.dart';
1413
import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart';
14+
import 'package:runanywhere_onnx/runanywhere_onnx.dart';
1515

1616
/// RunAnywhereAIApp (mirroring iOS RunAnywhereAIApp.swift)
1717
///
@@ -140,8 +140,11 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
140140
Future<void> _registerModulesAndModels() async {
141141
debugPrint('📦 Registering modules with their models...');
142142

143-
// --- LLAMACPP MODULE ---
143+
// LlamaCPP module with LLM models
144+
// Using explicit IDs ensures models are recognized after download across app restarts
144145
await LlamaCpp.register();
146+
147+
// Yield after heavy backend registration
145148
await Future<void>.delayed(Duration.zero);
146149

147150
LlamaCpp.addModel(
@@ -187,6 +190,8 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
187190
memoryRequirement: 400000000,
188191
);
189192

193+
// Tool Calling Optimized Models
194+
// LFM2-1.2B-Tool - Designed for concise and precise tool calling (Liquid AI)
190195
LlamaCpp.addModel(
191196
id: 'lfm2-1.2b-tool-q4_k_m',
192197
name: 'LiquidAI LFM2 1.2B Tool Q4_K_M',
@@ -201,10 +206,16 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
201206
'https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF/resolve/main/LFM2-1.2B-Tool-Q8_0.gguf',
202207
memoryRequirement: 1400000000,
203208
);
204-
debugPrint('✅ LlamaCPP module registered');
209+
debugPrint('✅ LlamaCPP module registered with LLM models (including tool-calling optimized models)');
210+
211+
// Yield between module registrations
205212
await Future<void>.delayed(Duration.zero);
206213

207-
// --- VLM MODULE ---
214+
// Register VLM (Vision Language) models
215+
// VLM models require 2 files: main model + mmproj (vision projector)
216+
// Bundled as tar.gz archives for easy download/extraction
217+
218+
// SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
208219
RunAnywhere.registerModel(
209220
id: 'smolvlm-500m-instruct-q8_0',
210221
name: 'SmolVLM 500M Instruct',
@@ -218,81 +229,57 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
218229
memoryRequirement: 600000000,
219230
);
220231
debugPrint('✅ VLM models registered');
232+
233+
// Yield between module registrations
234+
await Future<void>.delayed(Duration.zero);
235+
236+
// Diffusion (image generation) is not registered here. CoreML diffusion is supported
237+
// only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
238+
239+
// ONNX module with STT and TTS models
240+
// Using tar.gz format hosted on RunanywhereAI/sherpa-onnx for fast native extraction
241+
// Using explicit IDs ensures models are recognized after download across app restarts
242+
await Onnx.register();
243+
244+
// Yield after heavy backend registration
221245
await Future<void>.delayed(Duration.zero);
222246

223-
// --- ONNX MODULE (STT/TTS via Core SDK) ---
224247
// STT Models (Sherpa-ONNX Whisper)
225-
RunAnywhere.registerModel(
248+
Onnx.addModel(
226249
id: 'sherpa-onnx-whisper-tiny.en',
227250
name: 'Sherpa Whisper Tiny (ONNX)',
228-
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz'),
229-
framework: InferenceFramework.onnx,
251+
url:
252+
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz',
230253
modality: ModelCategory.speechRecognition,
231254
memoryRequirement: 75000000,
232255
);
233-
234-
RunAnywhere.registerModel(
256+
Onnx.addModel(
235257
id: 'sherpa-onnx-whisper-small.en',
236258
name: 'Sherpa Whisper Small (ONNX)',
237-
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz'),
238-
framework: InferenceFramework.onnx,
259+
url:
260+
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz',
239261
modality: ModelCategory.speechRecognition,
240262
memoryRequirement: 250000000,
241263
);
242264

243265
// TTS Models (Piper VITS)
244-
RunAnywhere.registerModel(
266+
Onnx.addModel(
245267
id: 'vits-piper-en_US-lessac-medium',
246268
name: 'Piper TTS (US English - Medium)',
247-
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz'),
248-
framework: InferenceFramework.onnx,
269+
url:
270+
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz',
249271
modality: ModelCategory.speechSynthesis,
250272
memoryRequirement: 65000000,
251273
);
252-
253-
RunAnywhere.registerModel(
274+
Onnx.addModel(
254275
id: 'vits-piper-en_GB-alba-medium',
255276
name: 'Piper TTS (British English)',
256-
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz'),
257-
framework: InferenceFramework.onnx,
277+
url:
278+
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz',
258279
modality: ModelCategory.speechSynthesis,
259280
memoryRequirement: 65000000,
260281
);
261-
debugPrint('✅ STT/TTS models registered via Core SDK');
262-
await Future<void>.delayed(Duration.zero);
263-
264-
// --- RAG EMBEDDINGS ---
265-
RunAnywhere.registerMultiFileModel(
266-
id: 'all-minilm-l6-v2',
267-
name: 'All MiniLM L6 v2 (Embedding)',
268-
files: [
269-
ModelFileDescriptor(
270-
relativePath: 'model.onnx',
271-
destinationPath: 'model.onnx',
272-
url: Uri.parse(
273-
'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx'),
274-
),
275-
ModelFileDescriptor(
276-
relativePath: 'vocab.txt',
277-
destinationPath: 'vocab.txt',
278-
url: Uri.parse(
279-
'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt'),
280-
),
281-
],
282-
framework: InferenceFramework.onnx,
283-
modality: ModelCategory.embedding,
284-
memoryRequirement: 25500000,
285-
);
286-
debugPrint('✅ ONNX Embedding models registered');
287-
await Future<void>.delayed(Duration.zero);
288-
289-
// --- RAG BACKEND ---
290-
try {
291-
await RAGModule.register();
292-
debugPrint('✅ RAG backend registered');
293-
} catch (e) {
294-
debugPrint('⚠️ RAG backend not available (RAG features disabled): $e');
295-
}
282+
debugPrint('✅ ONNX module registered with STT/TTS models');
296283

297284
debugPrint('🎉 All modules and models registered');
298285
}

examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import 'package:runanywhere_ai/features/models/model_selection_sheet.dart';
1616
import 'package:runanywhere_ai/features/models/model_status_components.dart';
1717
import 'package:runanywhere_ai/features/models/model_types.dart';
1818
import 'package:runanywhere_ai/features/settings/tool_settings_view_model.dart';
19-
import 'package:runanywhere_ai/features/rag/rag_demo_view.dart';
2019
import 'package:runanywhere_ai/features/structured_output/structured_output_view.dart';
2120
import 'package:shared_preferences/shared_preferences.dart';
2221

@@ -455,17 +454,6 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
455454
appBar: AppBar(
456455
title: const Text('Chat'),
457456
actions: [
458-
IconButton(
459-
icon: const Icon(Icons.article_outlined),
460-
onPressed: () {
461-
Navigator.of(context).push<void>(
462-
MaterialPageRoute<void>(
463-
builder: (context) => const RagDemoView(),
464-
),
465-
);
466-
},
467-
tooltip: 'Document Q&A',
468-
),
469457
IconButton(
470458
icon: const Icon(Icons.data_object),
471459
onPressed: () {
@@ -959,4 +947,4 @@ class _MessageBubbleState extends State<_MessageBubble> {
959947
),
960948
);
961949
}
962-
}
950+
}

examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ class ModelListViewModel extends ChangeNotifier {
106106
return ModelCategory.imageGeneration;
107107
case sdk.ModelCategory.audio:
108108
return ModelCategory.audio;
109-
case sdk.ModelCategory.embedding:
110-
return ModelCategory.embedding;
111109
}
112110
}
113111

examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -507,15 +507,8 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
507507
});
508508

509509
try {
510-
// RAG contexts record the selection only — do NOT pre-load into memory.
511-
// The RAG pipeline loads models on demand when the document is ingested.
512-
final isRagContext = widget.context == ModelSelectionContext.ragEmbedding ||
513-
widget.context == ModelSelectionContext.ragLLM;
514-
515-
if (!isRagContext) {
516-
// Update view model selection state (loads the model into memory)
517-
await _viewModel.selectModel(model);
518-
}
510+
// Update view model selection state
511+
await _viewModel.selectModel(model);
519512

520513
// Call the callback - this is where the actual model loading happens
521514
// The callback knows the correct context and how to load the model

0 commit comments

Comments
 (0)