Skip to content

Commit 596e8c6

Browse files
Merge branch 'shubham-rag-fix'
2 parents 296c8bd + 7062e5a commit 596e8c6

228 files changed

Lines changed: 10609 additions & 17501 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Package.swift

Lines changed: 4 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,12 @@ import Foundation
3737
// ./scripts/build-swift.sh --set-remote (sets useLocalBinaries = false)
3838
//
3939
// =============================================================================
40-
let useLocalBinaries = false // Toggle: true for local dev, false for release
40+
let useLocalBinaries = true // Toggle: true for local dev, false for release
4141

4242
// Version for remote XCFrameworks (used when testLocal = false)
4343
// Updated automatically by CI/CD during releases
4444
let sdkVersion = "0.19.6"
4545

46-
// RAG binary is only available in local dev mode until the release artifact is published.
47-
// In remote mode, the RAG xcframework zip + checksum don't exist yet, so including the
48-
// binary target would block ALL SPM package resolution (not just RAG).
49-
// Set to true once RABackendRAG-v<version>.zip is published to GitHub releases.
50-
let ragRemoteBinaryAvailable = false
51-
5246
let package = Package(
5347
name: "runanywhere-sdks",
5448
platforms: [
@@ -87,7 +81,7 @@ let package = Package(
8781
name: "RunAnywhereWhisperKit",
8882
targets: ["WhisperKitRuntime"]
8983
),
90-
] + ragProducts(),
84+
],
9185
dependencies: [
9286
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
9387
.package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -152,7 +146,7 @@ let package = Package(
152146
.product(name: "StableDiffusion", package: "ml-stable-diffusion"),
153147
"CRACommons",
154148
"RACommonsBinary",
155-
] + ragCoreDependencies(),
149+
],
156150
path: "sdk/runanywhere-swift/Sources/RunAnywhere",
157151
exclude: ["CRACommons"],
158152
swiftSettings: [
@@ -231,62 +225,9 @@ let package = Package(
231225
path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
232226
),
233227

234-
] + ragTargets() + binaryTargets()
228+
] + binaryTargets()
235229
)
236230

237-
// =============================================================================
238-
// RAG TARGET HELPERS
239-
// =============================================================================
240-
// RAG targets are gated because the remote binary artifact doesn't exist yet.
241-
// Including a binary target with a placeholder checksum blocks ALL SPM resolution.
242-
243-
/// RAG product (library) — only included when the binary is available
244-
func ragProducts() -> [Product] {
245-
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
246-
return [
247-
.library(
248-
name: "RunAnywhereRAG",
249-
targets: ["RAGRuntime"]
250-
),
251-
]
252-
}
253-
254-
/// RAG dependency for the RunAnywhere core target
255-
/// NOTE: Core already accesses RAG C headers via CRACommons umbrella (rac_rag.h, rac_rag_pipeline.h).
256-
/// No additional dependency needed — RAGBackend is only used by RAGRuntime.
257-
func ragCoreDependencies() -> [Target.Dependency] {
258-
return []
259-
}
260-
261-
/// RAG-related targets (C bridge + Swift runtime)
262-
func ragTargets() -> [Target] {
263-
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
264-
return [
265-
// C Bridge Module - RAG Backend Headers
266-
.target(
267-
name: "RAGBackend",
268-
dependencies: ["RABackendRAGBinary"],
269-
path: "sdk/runanywhere-swift/Sources/RAGRuntime/include",
270-
publicHeadersPath: "."
271-
),
272-
// RAG Runtime Backend
273-
.target(
274-
name: "RAGRuntime",
275-
dependencies: [
276-
"RunAnywhere",
277-
"RAGBackend",
278-
"ONNXRuntime",
279-
"LlamaCPPRuntime",
280-
],
281-
path: "sdk/runanywhere-swift/Sources/RAGRuntime",
282-
exclude: ["include"],
283-
linkerSettings: [
284-
.linkedLibrary("c++"),
285-
]
286-
),
287-
]
288-
}
289-
290231
// =============================================================================
291232
// BINARY TARGET SELECTION
292233
// =============================================================================
@@ -314,10 +255,6 @@ func binaryTargets() -> [Target] {
314255
name: "RABackendONNXBinary",
315256
path: "sdk/runanywhere-swift/Binaries/RABackendONNX.xcframework"
316257
),
317-
.binaryTarget(
318-
name: "RABackendRAGBinary",
319-
path: "sdk/runanywhere-swift/Binaries/RABackendRAG.xcframework"
320-
),
321258
]
322259

323260
// ONNX Runtime xcframeworks - split by platform
@@ -369,17 +306,6 @@ func binaryTargets() -> [Target] {
369306
),
370307
]
371308

372-
// Only include RAG binary when the release artifact is available
373-
if ragRemoteBinaryAvailable {
374-
targets.append(
375-
.binaryTarget(
376-
name: "RABackendRAGBinary",
377-
url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
378-
checksum: "0000000000000000000000000000000000000000000000000000000000000000" // Replace with actual checksum
379-
)
380-
)
381-
}
382-
383309
return targets
384310
}
385311
}

examples/android/RunAnywhereAI/app/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ dependencies {
219219
// Pick the backends you need:
220220
implementation(project(":runanywhere-core-llamacpp")) // ~45MB - LLM text generation
221221
implementation(project(":runanywhere-core-onnx")) // ~30MB - STT, TTS, VAD
222-
implementation(project(":runanywhere-core-rag")) // RAG pipeline JNI bindings
222+
// RAG pipeline is now part of the core SDK (not a separate module)
223223

224224
// AndroidX Core & Lifecycle
225225
implementation(libs.androidx.core.ktx)

examples/android/RunAnywhereAI/settings.gradle.kts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,5 @@ include(":runanywhere-core-onnx")
5353
project(":runanywhere-core-onnx").projectDir =
5454
file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-onnx")
5555

56-
// RAG module - Retrieval-Augmented Generation adapter
57-
include(":runanywhere-core-rag")
58-
project(":runanywhere-core-rag").projectDir =
59-
file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-rag")
56+
// RAG pipeline is now part of the core SDK (not a separate module).
57+
// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt

examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import 'package:runanywhere_ai/core/design_system/app_spacing.dart';
1010
import 'package:runanywhere_ai/core/services/model_manager.dart';
1111
import 'package:runanywhere_ai/core/utilities/constants.dart';
1212
import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
13+
import 'package:runanywhere/public/extensions/rag_module.dart';
1314
import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart';
14-
import 'package:runanywhere_onnx/runanywhere_onnx.dart';
1515

1616
/// RunAnywhereAIApp (mirroring iOS RunAnywhereAIApp.swift)
1717
///
@@ -140,11 +140,8 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
140140
Future<void> _registerModulesAndModels() async {
141141
debugPrint('📦 Registering modules with their models...');
142142

143-
// LlamaCPP module with LLM models
144-
// Using explicit IDs ensures models are recognized after download across app restarts
143+
// --- LLAMACPP MODULE ---
145144
await LlamaCpp.register();
146-
147-
// Yield after heavy backend registration
148145
await Future<void>.delayed(Duration.zero);
149146

150147
LlamaCpp.addModel(
@@ -190,8 +187,6 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
190187
memoryRequirement: 400000000,
191188
);
192189

193-
// Tool Calling Optimized Models
194-
// LFM2-1.2B-Tool - Designed for concise and precise tool calling (Liquid AI)
195190
LlamaCpp.addModel(
196191
id: 'lfm2-1.2b-tool-q4_k_m',
197192
name: 'LiquidAI LFM2 1.2B Tool Q4_K_M',
@@ -206,16 +201,10 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
206201
'https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF/resolve/main/LFM2-1.2B-Tool-Q8_0.gguf',
207202
memoryRequirement: 1400000000,
208203
);
209-
debugPrint('✅ LlamaCPP module registered with LLM models (including tool-calling optimized models)');
210-
211-
// Yield between module registrations
204+
debugPrint('✅ LlamaCPP module registered');
212205
await Future<void>.delayed(Duration.zero);
213206

214-
// Register VLM (Vision Language) models
215-
// VLM models require 2 files: main model + mmproj (vision projector)
216-
// Bundled as tar.gz archives for easy download/extraction
217-
218-
// SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
207+
// --- VLM MODULE ---
219208
RunAnywhere.registerModel(
220209
id: 'smolvlm-500m-instruct-q8_0',
221210
name: 'SmolVLM 500M Instruct',
@@ -229,57 +218,81 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
229218
memoryRequirement: 600000000,
230219
);
231220
debugPrint('✅ VLM models registered');
232-
233-
// Yield between module registrations
234-
await Future<void>.delayed(Duration.zero);
235-
236-
// Diffusion (image generation) is not registered here. CoreML diffusion is supported
237-
// only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
238-
239-
// ONNX module with STT and TTS models
240-
// Using tar.gz format hosted on RunanywhereAI/sherpa-onnx for fast native extraction
241-
// Using explicit IDs ensures models are recognized after download across app restarts
242-
await Onnx.register();
243-
244-
// Yield after heavy backend registration
245221
await Future<void>.delayed(Duration.zero);
246222

223+
// --- ONNX MODULE (STT/TTS via Core SDK) ---
247224
// STT Models (Sherpa-ONNX Whisper)
248-
Onnx.addModel(
225+
RunAnywhere.registerModel(
249226
id: 'sherpa-onnx-whisper-tiny.en',
250227
name: 'Sherpa Whisper Tiny (ONNX)',
251-
url:
252-
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz',
228+
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz'),
229+
framework: InferenceFramework.onnx,
253230
modality: ModelCategory.speechRecognition,
254231
memoryRequirement: 75000000,
255232
);
256-
Onnx.addModel(
233+
234+
RunAnywhere.registerModel(
257235
id: 'sherpa-onnx-whisper-small.en',
258236
name: 'Sherpa Whisper Small (ONNX)',
259-
url:
260-
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz',
237+
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz'),
238+
framework: InferenceFramework.onnx,
261239
modality: ModelCategory.speechRecognition,
262240
memoryRequirement: 250000000,
263241
);
264242

265243
// TTS Models (Piper VITS)
266-
Onnx.addModel(
244+
RunAnywhere.registerModel(
267245
id: 'vits-piper-en_US-lessac-medium',
268246
name: 'Piper TTS (US English - Medium)',
269-
url:
270-
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz',
247+
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz'),
248+
framework: InferenceFramework.onnx,
271249
modality: ModelCategory.speechSynthesis,
272250
memoryRequirement: 65000000,
273251
);
274-
Onnx.addModel(
252+
253+
RunAnywhere.registerModel(
275254
id: 'vits-piper-en_GB-alba-medium',
276255
name: 'Piper TTS (British English)',
277-
url:
278-
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz',
256+
url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz'),
257+
framework: InferenceFramework.onnx,
279258
modality: ModelCategory.speechSynthesis,
280259
memoryRequirement: 65000000,
281260
);
282-
debugPrint('✅ ONNX module registered with STT/TTS models');
261+
debugPrint('✅ STT/TTS models registered via Core SDK');
262+
await Future<void>.delayed(Duration.zero);
263+
264+
// --- RAG EMBEDDINGS ---
265+
RunAnywhere.registerMultiFileModel(
266+
id: 'all-minilm-l6-v2',
267+
name: 'All MiniLM L6 v2 (Embedding)',
268+
files: [
269+
ModelFileDescriptor(
270+
relativePath: 'model.onnx',
271+
destinationPath: 'model.onnx',
272+
url: Uri.parse(
273+
'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx'),
274+
),
275+
ModelFileDescriptor(
276+
relativePath: 'vocab.txt',
277+
destinationPath: 'vocab.txt',
278+
url: Uri.parse(
279+
'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt'),
280+
),
281+
],
282+
framework: InferenceFramework.onnx,
283+
modality: ModelCategory.embedding,
284+
memoryRequirement: 25500000,
285+
);
286+
debugPrint('✅ ONNX Embedding models registered');
287+
await Future<void>.delayed(Duration.zero);
288+
289+
// --- RAG BACKEND ---
290+
try {
291+
await RAGModule.register();
292+
debugPrint('✅ RAG backend registered');
293+
} catch (e) {
294+
debugPrint('⚠️ RAG backend not available (RAG features disabled): $e');
295+
}
283296

284297
debugPrint('🎉 All modules and models registered');
285298
}

examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import 'package:runanywhere_ai/features/models/model_selection_sheet.dart';
1616
import 'package:runanywhere_ai/features/models/model_status_components.dart';
1717
import 'package:runanywhere_ai/features/models/model_types.dart';
1818
import 'package:runanywhere_ai/features/settings/tool_settings_view_model.dart';
19+
import 'package:runanywhere_ai/features/rag/rag_demo_view.dart';
1920
import 'package:runanywhere_ai/features/structured_output/structured_output_view.dart';
2021
import 'package:shared_preferences/shared_preferences.dart';
2122

@@ -454,6 +455,17 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
454455
appBar: AppBar(
455456
title: const Text('Chat'),
456457
actions: [
458+
IconButton(
459+
icon: const Icon(Icons.article_outlined),
460+
onPressed: () {
461+
Navigator.of(context).push<void>(
462+
MaterialPageRoute<void>(
463+
builder: (context) => const RagDemoView(),
464+
),
465+
);
466+
},
467+
tooltip: 'Document Q&A',
468+
),
457469
IconButton(
458470
icon: const Icon(Icons.data_object),
459471
onPressed: () {
@@ -947,4 +959,4 @@ class _MessageBubbleState extends State<_MessageBubble> {
947959
),
948960
);
949961
}
950-
}
962+
}

examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ class ModelListViewModel extends ChangeNotifier {
106106
return ModelCategory.imageGeneration;
107107
case sdk.ModelCategory.audio:
108108
return ModelCategory.audio;
109+
case sdk.ModelCategory.embedding:
110+
return ModelCategory.embedding;
109111
}
110112
}
111113

examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,8 +507,15 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
507507
});
508508

509509
try {
510-
// Update view model selection state
511-
await _viewModel.selectModel(model);
510+
// RAG contexts record the selection only — do NOT pre-load into memory.
511+
// The RAG pipeline loads models on demand when the document is ingested.
512+
final isRagContext = widget.context == ModelSelectionContext.ragEmbedding ||
513+
widget.context == ModelSelectionContext.ragLLM;
514+
515+
if (!isRagContext) {
516+
// Update view model selection state (loads the model into memory)
517+
await _viewModel.selectModel(model);
518+
}
512519

513520
// Call the callback - this is where the actual model loading happens
514521
// The callback knows the correct context and how to load the model

0 commit comments

Comments
 (0)