RunanywhereAI
diff --git a/‎Package.swift‎
Lines changed: 4 additions & 78 deletions b/‎Package.swift‎
Lines changed: 4 additions & 78 deletions
diff --git a/‎examples/android/RunAnywhereAI/app/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion b/‎examples/android/RunAnywhereAI/app/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/android/RunAnywhereAI/settings.gradle.kts‎
Lines changed: 2 additions & 4 deletions b/‎examples/android/RunAnywhereAI/settings.gradle.kts‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 54 additions & 41 deletions b/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 54 additions & 41 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart‎
Lines changed: 13 additions & 1 deletion b/‎examples/flutter/RunAnywhereAI/lib/features/chat/chat_interface_view.dart‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart‎
Lines changed: 2 additions & 0 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_list_view_model.dart‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 9 additions & 2 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 9 additions & 2 deletions
@@ -37,18 +37,12 @@ import Foundation
 //   ./scripts/build-swift.sh --set-remote  (sets useLocalBinaries = false)
 //
 // =============================================================================
-let useLocalBinaries = false //  Toggle: true for local dev, false for release
+let useLocalBinaries = true //  Toggle: true for local dev, false for release
 
 // Version for remote XCFrameworks (used when testLocal = false)
 // Updated automatically by CI/CD during releases
 let sdkVersion = "0.19.6"
 
-// RAG binary is only available in local dev mode until the release artifact is published.
-// In remote mode, the RAG xcframework zip + checksum don't exist yet, so including the
-// binary target would block ALL SPM package resolution (not just RAG).
-// Set to true once RABackendRAG-v<version>.zip is published to GitHub releases.
-let ragRemoteBinaryAvailable = false
-
 let package = Package(
     name: "runanywhere-sdks",
     platforms: [
@@ -87,7 +81,7 @@ let package = Package(
             name: "RunAnywhereWhisperKit",
             targets: ["WhisperKitRuntime"]
         ),
-    ] + ragProducts(),
+    ],
     dependencies: [
         .package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
         .package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -152,7 +146,7 @@ let package = Package(
                 .product(name: "StableDiffusion", package: "ml-stable-diffusion"),
                 "CRACommons",
                 "RACommonsBinary",
-            ] + ragCoreDependencies(),
+            ],
             path: "sdk/runanywhere-swift/Sources/RunAnywhere",
             exclude: ["CRACommons"],
             swiftSettings: [
@@ -231,62 +225,9 @@ let package = Package(
             path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
         ),
 
-    ] + ragTargets() + binaryTargets()
+    ] + binaryTargets()
 )
 
-// =============================================================================
-// RAG TARGET HELPERS
-// =============================================================================
-// RAG targets are gated because the remote binary artifact doesn't exist yet.
-// Including a binary target with a placeholder checksum blocks ALL SPM resolution.
-
-/// RAG product (library) — only included when the binary is available
-func ragProducts() -> [Product] {
-    guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
-    return [
-        .library(
-            name: "RunAnywhereRAG",
-            targets: ["RAGRuntime"]
-        ),
-    ]
-}
-
-/// RAG dependency for the RunAnywhere core target
-/// NOTE: Core already accesses RAG C headers via CRACommons umbrella (rac_rag.h, rac_rag_pipeline.h).
-/// No additional dependency needed — RAGBackend is only used by RAGRuntime.
-func ragCoreDependencies() -> [Target.Dependency] {
-    return []
-}
-
-/// RAG-related targets (C bridge + Swift runtime)
-func ragTargets() -> [Target] {
-    guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
-    return [
-        // C Bridge Module - RAG Backend Headers
-        .target(
-            name: "RAGBackend",
-            dependencies: ["RABackendRAGBinary"],
-            path: "sdk/runanywhere-swift/Sources/RAGRuntime/include",
-            publicHeadersPath: "."
-        ),
-        // RAG Runtime Backend
-        .target(
-            name: "RAGRuntime",
-            dependencies: [
-                "RunAnywhere",
-                "RAGBackend",
-                "ONNXRuntime",
-                "LlamaCPPRuntime",
-            ],
-            path: "sdk/runanywhere-swift/Sources/RAGRuntime",
-            exclude: ["include"],
-            linkerSettings: [
-                .linkedLibrary("c++"),
-            ]
-        ),
-    ]
-}
-
 // =============================================================================
 // BINARY TARGET SELECTION
 // =============================================================================
@@ -314,10 +255,6 @@ func binaryTargets() -> [Target] {
                 name: "RABackendONNXBinary",
                 path: "sdk/runanywhere-swift/Binaries/RABackendONNX.xcframework"
             ),
-            .binaryTarget(
-                name: "RABackendRAGBinary",
-                path: "sdk/runanywhere-swift/Binaries/RABackendRAG.xcframework"
-            ),
         ]
 
         // ONNX Runtime xcframeworks - split by platform
@@ -369,17 +306,6 @@ func binaryTargets() -> [Target] {
             ),
         ]
 
-        // Only include RAG binary when the release artifact is available
-        if ragRemoteBinaryAvailable {
-            targets.append(
-                .binaryTarget(
-                    name: "RABackendRAGBinary",
-                    url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
-                    checksum: "0000000000000000000000000000000000000000000000000000000000000000" // Replace with actual checksum
-                )
-            )
-        }
-
         return targets
     }
 }
@@ -219,7 +219,7 @@ dependencies {
     // Pick the backends you need:
     implementation(project(":runanywhere-core-llamacpp")) // ~45MB - LLM text generation
     implementation(project(":runanywhere-core-onnx")) // ~30MB - STT, TTS, VAD
-    implementation(project(":runanywhere-core-rag")) // RAG pipeline JNI bindings
+    // RAG pipeline is now part of the core SDK (not a separate module)
 
     // AndroidX Core & Lifecycle
     implementation(libs.androidx.core.ktx)
 
@@ -53,7 +53,5 @@ include(":runanywhere-core-onnx")
 project(":runanywhere-core-onnx").projectDir =
     file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-onnx")
 
-// RAG module - Retrieval-Augmented Generation adapter
-include(":runanywhere-core-rag")
-project(":runanywhere-core-rag").projectDir =
-    file("../../../sdk/runanywhere-kotlin/modules/runanywhere-core-rag")
+// RAG pipeline is now part of the core SDK (not a separate module).
+// Registration is handled by ragCreatePipeline(). See: RunAnywhere+RAG.jvmAndroid.kt
@@ -10,8 +10,8 @@ import 'package:runanywhere_ai/core/design_system/app_spacing.dart';
 import 'package:runanywhere_ai/core/services/model_manager.dart';
 import 'package:runanywhere_ai/core/utilities/constants.dart';
 import 'package:runanywhere_ai/core/utilities/keychain_helper.dart';
+import 'package:runanywhere/public/extensions/rag_module.dart';
 import 'package:runanywhere_llamacpp/runanywhere_llamacpp.dart';
-import 'package:runanywhere_onnx/runanywhere_onnx.dart';
 
 /// RunAnywhereAIApp (mirroring iOS RunAnywhereAIApp.swift)
 ///
@@ -140,11 +140,8 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
   Future<void> _registerModulesAndModels() async {
     debugPrint('📦 Registering modules with their models...');
 
-    // LlamaCPP module with LLM models
-    // Using explicit IDs ensures models are recognized after download across app restarts
+    // --- LLAMACPP MODULE ---
     await LlamaCpp.register();
-
-    // Yield after heavy backend registration
     await Future<void>.delayed(Duration.zero);
 
     LlamaCpp.addModel(
@@ -190,8 +187,6 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
       memoryRequirement: 400000000,
     );
 
-    // Tool Calling Optimized Models
-    // LFM2-1.2B-Tool - Designed for concise and precise tool calling (Liquid AI)
     LlamaCpp.addModel(
       id: 'lfm2-1.2b-tool-q4_k_m',
       name: 'LiquidAI LFM2 1.2B Tool Q4_K_M',
@@ -206,16 +201,10 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
           'https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF/resolve/main/LFM2-1.2B-Tool-Q8_0.gguf',
       memoryRequirement: 1400000000,
     );
-    debugPrint('✅ LlamaCPP module registered with LLM models (including tool-calling optimized models)');
-
-    // Yield between module registrations
+    debugPrint('✅ LlamaCPP module registered');
     await Future<void>.delayed(Duration.zero);
 
-    // Register VLM (Vision Language) models
-    // VLM models require 2 files: main model + mmproj (vision projector)
-    // Bundled as tar.gz archives for easy download/extraction
-
-    // SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
+    // --- VLM MODULE ---
     RunAnywhere.registerModel(
       id: 'smolvlm-500m-instruct-q8_0',
       name: 'SmolVLM 500M Instruct',
@@ -229,57 +218,81 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
       memoryRequirement: 600000000,
     );
     debugPrint('✅ VLM models registered');
-
-    // Yield between module registrations
-    await Future<void>.delayed(Duration.zero);
-
-    // Diffusion (image generation) is not registered here. CoreML diffusion is supported
-    // only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
-
-    // ONNX module with STT and TTS models
-    // Using tar.gz format hosted on RunanywhereAI/sherpa-onnx for fast native extraction
-    // Using explicit IDs ensures models are recognized after download across app restarts
-    await Onnx.register();
-
-    // Yield after heavy backend registration
     await Future<void>.delayed(Duration.zero);
 
+    // --- ONNX MODULE (STT/TTS via Core SDK) ---
     // STT Models (Sherpa-ONNX Whisper)
-    Onnx.addModel(
+    RunAnywhere.registerModel(
       id: 'sherpa-onnx-whisper-tiny.en',
       name: 'Sherpa Whisper Tiny (ONNX)',
-      url:
-          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz',
+      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-tiny.en.tar.gz'),
+      framework: InferenceFramework.onnx,
       modality: ModelCategory.speechRecognition,
       memoryRequirement: 75000000,
     );
-    Onnx.addModel(
+
+    RunAnywhere.registerModel(
       id: 'sherpa-onnx-whisper-small.en',
       name: 'Sherpa Whisper Small (ONNX)',
-      url:
-          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz',
+      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/sherpa-onnx-whisper-small.en.tar.gz'),
+      framework: InferenceFramework.onnx,
       modality: ModelCategory.speechRecognition,
       memoryRequirement: 250000000,
     );
 
     // TTS Models (Piper VITS)
-    Onnx.addModel(
+    RunAnywhere.registerModel(
       id: 'vits-piper-en_US-lessac-medium',
       name: 'Piper TTS (US English - Medium)',
-      url:
-          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz',
+      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz'),
+      framework: InferenceFramework.onnx,
       modality: ModelCategory.speechSynthesis,
       memoryRequirement: 65000000,
     );
-    Onnx.addModel(
+
+    RunAnywhere.registerModel(
       id: 'vits-piper-en_GB-alba-medium',
       name: 'Piper TTS (British English)',
-      url:
-          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz',
+      url: Uri.parse('https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_GB-alba-medium.tar.gz'),
+      framework: InferenceFramework.onnx,
       modality: ModelCategory.speechSynthesis,
       memoryRequirement: 65000000,
     );
-    debugPrint('✅ ONNX module registered with STT/TTS models');
+    debugPrint('✅ STT/TTS models registered via Core SDK');
+    await Future<void>.delayed(Duration.zero);
+
+    // --- RAG EMBEDDINGS ---
+    RunAnywhere.registerMultiFileModel(
+      id: 'all-minilm-l6-v2',
+      name: 'All MiniLM L6 v2 (Embedding)',
+      files: [
+        ModelFileDescriptor(
+          relativePath: 'model.onnx',
+          destinationPath: 'model.onnx',
+          url: Uri.parse(
+              'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx'),
+        ),
+        ModelFileDescriptor(
+          relativePath: 'vocab.txt',
+          destinationPath: 'vocab.txt',
+          url: Uri.parse(
+              'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt'),
+        ),
+      ],
+      framework: InferenceFramework.onnx,
+      modality: ModelCategory.embedding,
+      memoryRequirement: 25500000,
+    );
+    debugPrint('✅ ONNX Embedding models registered');
+    await Future<void>.delayed(Duration.zero);
+
+    // --- RAG BACKEND ---
+    try {
+      await RAGModule.register();
+      debugPrint('✅ RAG backend registered');
+    } catch (e) {
+      debugPrint('⚠️ RAG backend not available (RAG features disabled): $e');
+    }
 
     debugPrint('🎉 All modules and models registered');
   }
 
@@ -16,6 +16,7 @@ import 'package:runanywhere_ai/features/models/model_selection_sheet.dart';
 import 'package:runanywhere_ai/features/models/model_status_components.dart';
 import 'package:runanywhere_ai/features/models/model_types.dart';
 import 'package:runanywhere_ai/features/settings/tool_settings_view_model.dart';
+import 'package:runanywhere_ai/features/rag/rag_demo_view.dart';
 import 'package:runanywhere_ai/features/structured_output/structured_output_view.dart';
 import 'package:shared_preferences/shared_preferences.dart';
 
@@ -454,6 +455,17 @@ class _ChatInterfaceViewState extends State<ChatInterfaceView> {
       appBar: AppBar(
         title: const Text('Chat'),
       actions: [
+          IconButton(
+            icon: const Icon(Icons.article_outlined),
+            onPressed: () {
+              Navigator.of(context).push<void>(
+                MaterialPageRoute<void>(
+                  builder: (context) => const RagDemoView(),
+                ),
+              );
+            },
+            tooltip: 'Document Q&A',
+          ),
           IconButton(
             icon: const Icon(Icons.data_object),
             onPressed: () {
@@ -947,4 +959,4 @@ class _MessageBubbleState extends State<_MessageBubble> {
       ),
     );
   }
-}
+}
@@ -106,6 +106,8 @@ class ModelListViewModel extends ChangeNotifier {
         return ModelCategory.imageGeneration;
       case sdk.ModelCategory.audio:
         return ModelCategory.audio;
+      case sdk.ModelCategory.embedding:
+        return ModelCategory.embedding;
     }
   }
 
 
@@ -507,8 +507,15 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
     });
 
     try {
-      // Update view model selection state
-      await _viewModel.selectModel(model);
+      // RAG contexts record the selection only — do NOT pre-load into memory.
+      // The RAG pipeline loads models on demand when the document is ingested.
+      final isRagContext = widget.context == ModelSelectionContext.ragEmbedding ||
+          widget.context == ModelSelectionContext.ragLLM;
+
+      if (!isRagContext) {
+        // Update view model selection state (loads the model into memory)
+        await _viewModel.selectModel(model);
+      }
 
       // Call the callback - this is where the actual model loading happens
       // The callback knows the correct context and how to load the model
Original file line number	Diff line number	Diff line change
`@@ -106,6 +106,8 @@ class ModelListViewModel extends ChangeNotifier {`
`106`	`106`	`return ModelCategory.imageGeneration;`
`107`	`107`	`case sdk.ModelCategory.audio:`
`108`	`108`	`return ModelCategory.audio;`
	`109`	`+ case sdk.ModelCategory.embedding:`
	`110`	`+ return ModelCategory.embedding;`
`109`	`111`	`}`
`110`	`112`	`}`
`111`	`113`