RunanywhereAI
diff --git a/‎Package.swift‎
Lines changed: 78 additions & 4 deletions b/‎Package.swift‎
Lines changed: 78 additions & 4 deletions
diff --git a/‎examples/android/RunAnywhereAI/app/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion b/‎examples/android/RunAnywhereAI/app/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/android/RunAnywhereAI/settings.gradle.kts‎
Lines changed: 4 additions & 2 deletions b/‎examples/android/RunAnywhereAI/settings.gradle.kts‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 41 additions & 54 deletions b/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 41 additions & 54 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart‎
Lines changed: 1 addition & 13 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart‎
Lines changed: 0 additions & 2 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 2 additions & 9 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 2 additions & 9 deletions
@@ -37,12 +37,18 @@ import Foundation
 //   ./scripts/build-swift.sh --set-remote  (sets useLocalBinaries = false)
 //
 // =============================================================================
-let useLocalBinaries = true //  Toggle: true for local dev, false for release
+let useLocalBinaries = false //  Toggle: true for local dev, false for release
 
 // Version for remote XCFrameworks (used when testLocal = false)
 // Updated automatically by CI/CD during releases
 let sdkVersion = "0.19.6"
 
+// RAG binary is only available in local dev mode until the release artifact is published.
+// In remote mode, the RAG xcframework zip + checksum don't exist yet, so including the
+// binary target would block ALL SPM package resolution (not just RAG).
+// Set to true once RABackendRAG-v<version>.zip is published to GitHub releases.
+let ragRemoteBinaryAvailable = false
+
 let package = Package(
     name: "runanywhere-sdks",
     platforms: [
@@ -81,7 +87,7 @@ let package = Package(
             name: "RunAnywhereWhisperKit",
             targets: ["WhisperKitRuntime"]
         ),
-    ],
+    ] + ragProducts(),
     dependencies: [
         .package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
         .package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -146,7 +152,7 @@ let package = Package(
                 .product(name: "StableDiffusion", package: "ml-stable-diffusion"),
                 "CRACommons",
                 "RACommonsBinary",
-            ],
+            ] + ragCoreDependencies(),
             path: "sdk/runanywhere-swift/Sources/RunAnywhere",
             exclude: ["CRACommons"],
             swiftSettings: [
@@ -225,9 +231,62 @@ let package = Package(
             path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
         ),
 
-    ] + binaryTargets()
+    ] + ragTargets() + binaryTargets()
 )
 
+// =============================================================================
+// RAG TARGET HELPERS
+// =============================================================================
+// RAG targets are gated because the remote binary artifact doesn't exist yet.
+// Including a binary target with a placeholder checksum blocks ALL SPM resolution.
+
+/// RAG product (library) — only included when the binary is available
+func ragProducts() -> [Product] {
+    guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
+    return [
+        .library(
+            name: "RunAnywhereRAG",
+            targets: ["RAGRuntime"]
+        ),
+    ]
+}
+
+/// RAG dependency for the RunAnywhere core target
+/// NOTE: Core already accesses RAG C headers via CRACommons umbrella (rac_rag.h, rac_rag_pipeline.h).
+/// No additional dependency needed — RAGBackend is only used by RAGRuntime.
+func ragCoreDependencies() -> [Target.Dependency] {
+    return []
+}
+
+/// RAG-related targets (C bridge + Swift runtime)
+func ragTargets() -> [Target] {
+    guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
+    return [
+        // C Bridge Module - RAG Backend Headers
+        .target(
+            name: "RAGBackend",
+            dependencies: ["RABackendRAGBinary"],
+            path: "sdk/runanywhere-swift/Sources/RAGRuntime/include",
+            publicHeadersPath: "."
+        ),
+        // RAG Runtime Backend
+        .target(
+            name: "RAGRuntime",
+            dependencies: [
+                "RunAnywhere",
+                "RAGBackend",
+                "ONNXRuntime",
+                "LlamaCPPRuntime",
+            ],
+            path: "sdk/runanywhere-swift/Sources/RAGRuntime",
+            exclude: ["include"],
+            linkerSettings: [
+                .linkedLibrary("c++"),
+            ]
+        ),
+    ]
+}
+
 // =============================================================================
 // BINARY TARGET SELECTION
 // =============================================================================
@@ -255,6 +314,10 @@ func binaryTargets() -> [Target] {
                 name: "RABackendONNXBinary",
                 path: "sdk/runanywhere-swift/Binaries/RABackendONNX.xcframework"
             ),
+            .binaryTarget(
+                name: "RABackendRAGBinary",
+                path: "sdk/runanywhere-swift/Binaries/RABackendRAG.xcframework"
+            ),
         ]
 
         // ONNX Runtime xcframeworks - split by platform
@@ -306,6 +369,17 @@ func binaryTargets() -> [Target] {
             ),
         ]
 
+        // Only include RAG binary when the release artifact is available
+        if ragRemoteBinaryAvailable {
+            targets.append(
+                .binaryTarget(
+                    name: "RABackendRAGBinary",
+                    url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
+                    checksum: "0000000000000000000000000000000000000000000000000000000000000000" // Replace with actual checksum
+                )
+            )
+        }
+
         return targets
     }
 }
@@ -219,7 +219,7 @@ dependencies {
     // Pick the backends you need:
     implementation(project(":runanywhere-core-llamacpp")) // ~45MB - LLM text generation
     implementation(project(":runanywhere-core-onnx")) // ~30MB - STT, TTS, VAD
-    // RAG pipeline is now part of the core SDK (not a separate module)
+    implementation(project(":runanywhere-core-rag")) // RAG pipeline JNI bindings
 
     // AndroidX Core & Lifecycle
     implementation(libs.androidx.core.ktx)
 
@@ -53,5 +53,7 @@ include(":runanywhere-core-onnx")
 project(":runanywhere-core-onnx").projectDir =
     file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-onnx")
 
-// RAG pipeline is now part of the core SDK (not a separate module).
-// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt
+// RAG module - Retrieval-Augmented Generation adapter
+include(":runanywhere-core-rag")
+project(":runanywhere-core-rag").projectDir =
+    file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-rag")
@@ -10,8 +10,8 @@ import 'package:runanywhere_ai/core/design_system/app_spacing.dart';
 import 'package:runanywhere_ai/core/services/model_manager.dart';
 import 'package:runanywhere_ai/core/utilities/constants.dart';
 import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
-import 'package:runanywhere/public/extensions/rag_module.dart';
 import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart';
+import 'package:runanywhere_onnx/runanywhere_onnx.dart';
 
 /// RunAnywhereAIApp (mirroring iOS RunAnywhereAIApp.swift)
 ///
@@ -140,8 +140,11 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
   Future<void> _registerModulesAndModels() async {
     debugPrint('📦 Registering modules with their models...');
 
-    // --- LLAMACPP MODULE ---
+    // LlamaCPP module with LLM models
+    // Using explicit IDs ensures models are recognized after download across app restarts
     await LlamaCpp.register();
+
+    // Yield after heavy backend registration
     await Future<void>.delayed(Duration.zero);
 
     LlamaCpp.addModel(
@@ -187,6 +190,8 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
       memoryRequirement: 400000000,
     );
 
+    // Tool Calling Optimized Models
+    // LFM2-1.2B-Tool - Designed for concise and precise tool calling (Liquid AI)
     LlamaCpp.addModel(
       id: 'lfm2-1.2b-tool-q4_k_m',
       name: 'LiquidAI LFM2 1.2B Tool Q4_K_M',
@@ -201,10 +206,16 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
           'https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF/resolve/main/LFM2-1.2B-Tool-Q8_0.gguf',
       memoryRequirement: 1400000000,
     );
-    debugPrint('✅ LlamaCPP module registered');
+    debugPrint('✅ LlamaCPP module registered with LLM models (including tool-calling optimized models)');
+
+    // Yield between module registrations
     await Future<void>.delayed(Duration.zero);
 
-    // --- VLM MODULE ---
+    // Register VLM (Vision Language) models
+    // VLM models require 2 files: main model + mmproj (vision projector)
+    // Bundled as tar.gz archives for easy download/extraction
+
+    // SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
     RunAnywhere.registerModel(
       id: 'smolvlm-500m-instruct-q8_0',
       name: 'SmolVLM 500M Instruct',
@@ -218,81 +229,57 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
       memoryRequirement: 600000000,
     );
     debugPrint('✅ VLM models registered');
+
+    // Yield between module registrations
+    await Future<void>.delayed(Duration.zero);
+
+    // Diffusion (image generation) is not registered here. CoreML diffusion is supported
+    // only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
+
+    // ONNX module with STT and TTS models
+    // Using tar.gz format hosted on RunanywhereAI/sherpa-onnx for fast native extraction
+    // Using explicit IDs ensures models are recognized after download across app restarts
+    await Onnx.register();
+
+    // Yield after heavy backend registration
     await Future<void>.delayed(Duration.zero);
 
-    // --- ONNX MODULE (STT/TTS via Core SDK) ---
     // STT Models (Sherpa-ONNX Whisper)
-    RunAnywhere.registerModel(
+    Onnx.addModel(
       id: 'sherpa-onnx-whisper-tiny.en',
       name: 'Sherpa Whisper Tiny (ONNX)',
-      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz'),
-      framework: InferenceFramework.onnx,
+      url:
+          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz',
       modality: ModelCategory.speechRecognition,
       memoryRequirement: 75000000,
     );
-
-    RunAnywhere.registerModel(
+    Onnx.addModel(
       id: 'sherpa-onnx-whisper-small.en',
       name: 'Sherpa Whisper Small (ONNX)',
-      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz'),
-      framework: InferenceFramework.onnx,
+      url:
+          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz',
       modality: ModelCategory.speechRecognition,
       memoryRequirement: 250000000,
     );
 
     // TTS Models (Piper VITS)
-    RunAnywhere.registerModel(
+    Onnx.addModel(
       id: 'vits-piper-en_US-lessac-medium',
       name: 'Piper TTS (US English - Medium)',
-      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz'),
-      framework: InferenceFramework.onnx,
+      url:
+          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz',
       modality: ModelCategory.speechSynthesis,
       memoryRequirement: 65000000,
     );
-
-    RunAnywhere.registerModel(
+    Onnx.addModel(
       id: 'vits-piper-en_GB-alba-medium',
       name: 'Piper TTS (British English)',
-      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz'),
-      framework: InferenceFramework.onnx,
+      url:
+          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz',
       modality: ModelCategory.speechSynthesis,
       memoryRequirement: 65000000,
     );
-    debugPrint('✅ STT/TTS models registered via Core SDK');
-    await Future<void>.delayed(Duration.zero);
-
-    // --- RAG EMBEDDINGS ---
-    RunAnywhere.registerMultiFileModel(
-      id: 'all-minilm-l6-v2',
-      name: 'All MiniLM L6 v2 (Embedding)',
-      files: [
-        ModelFileDescriptor(
-          relativePath: 'model.onnx',
-          destinationPath: 'model.onnx',
-          url: Uri.parse(
-              'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx'),
-        ),
-        ModelFileDescriptor(
-          relativePath: 'vocab.txt',
-          destinationPath: 'vocab.txt',
-          url: Uri.parse(
-              'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt'),
-        ),
-      ],
-      framework: InferenceFramework.onnx,
-      modality: ModelCategory.embedding,
-      memoryRequirement: 25500000,
-    );
-    debugPrint('✅ ONNX Embedding models registered');
-    await Future<void>.delayed(Duration.zero);
-
-    // --- RAG BACKEND ---
-    try {
-      await RAGModule.register();
-      debugPrint('✅ RAG backend registered');
-    } catch (e) {
-      debugPrint('⚠️ RAG backend not available (RAG features disabled): $e');
-    }
+    debugPrint('✅ ONNX module registered with STT/TTS models');
 
     debugPrint('🎉 All modules and models registered');
   }
 
@@ -16,7 +16,6 @@ import 'package:runanywhere_ai/features/models/model_selection_sheet.dart';
 import 'package:runanywhere_ai/features/models/model_status_components.dart';
 import 'package:runanywhere_ai/features/models/model_types.dart';
 import 'package:runanywhere_ai/features/settings/tool_settings_view_model.dart';
-import 'package:runanywhere_ai/features/rag/rag_demo_view.dart';
 import 'package:runanywhere_ai/features/structured_output/structured_output_view.dart';
 import 'package:shared_preferences/shared_preferences.dart';
 
@@ -455,17 +454,6 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
       appBar: AppBar(
         title: const Text('Chat'),
       actions: [
-          IconButton(
-            icon: const Icon(Icons.article_outlined),
-            onPressed: () {
-              Navigator.of(context).push<void>(
-                MaterialPageRoute<void>(
-                  builder: (context) => const RagDemoView(),
-                ),
-              );
-            },
-            tooltip: 'Document Q&A',
-          ),
           IconButton(
             icon: const Icon(Icons.data_object),
             onPressed: () {
@@ -959,4 +947,4 @@ class _MessageBubbleState extends State<_MessageBubble> {
       ),
     );
   }
-}
+}
@@ -106,8 +106,6 @@ class ModelListViewModel extends ChangeNotifier {
         return ModelCategory.imageGeneration;
       case sdk.ModelCategory.audio:
         return ModelCategory.audio;
-      case sdk.ModelCategory.embedding:
-        return ModelCategory.embedding;
     }
   }
 
 
@@ -507,15 +507,8 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
     });
 
     try {
-      // RAG contexts record the selection only — do NOT pre-load into memory.
-      // The RAG pipeline loads models on demand when the document is ingested.
-      final isRagContext = widget.context == ModelSelectionContext.ragEmbedding ||
-          widget.context == ModelSelectionContext.ragLLM;
-
-      if (!isRagContext) {
-        // Update view model selection state (loads the model into memory)
-        await _viewModel.selectModel(model);
-      }
+      // Update view model selection state
+      await _viewModel.selectModel(model);
 
       // Call the callback - this is where the actual model loading happens
       // The callback knows the correct context and how to load the model
Original file line number	Diff line number	Diff line change
`@@ -106,8 +106,6 @@ class ModelListViewModel extends ChangeNotifier {`
`106`	`106`	`return ModelCategory.imageGeneration;`
`107`	`107`	`case sdk.ModelCategory.audio:`
`108`	`108`	`return ModelCategory.audio;`
`109`		`- case sdk.ModelCategory.embedding:`
`110`		`- return ModelCategory.embedding;`
`111`	`109`	`}`
`112`	`110`	`}`
`113`	`111`