RunanywhereAI
diff --git a/‎examples/flutter/RunAnywhereAI/ios/Podfile‎
Lines changed: 2 additions & 1 deletion b/‎examples/flutter/RunAnywhereAI/ios/Podfile‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/flutter/RunAnywhereAI/ios/Runner.xcodeproj/project.pbxproj‎
Lines changed: 3 additions & 0 deletions b/‎examples/flutter/RunAnywhereAI/ios/Runner.xcodeproj/project.pbxproj‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/ios/Runner/Info.plist‎
Lines changed: 10 additions & 8 deletions b/‎examples/flutter/RunAnywhereAI/ios/Runner/Info.plist‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/app/content_view.dart‎
Lines changed: 13 additions & 6 deletions b/‎examples/flutter/RunAnywhereAI/lib/app/content_view.dart‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 22 additions & 0 deletions b/‎examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/core/services/permission_service.dart‎
Lines changed: 65 additions & 0 deletions b/‎examples/flutter/RunAnywhereAI/lib/core/services/permission_service.dart‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 14 additions & 2 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_status_components.dart‎
Lines changed: 6 additions & 0 deletions b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_status_components.dart‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart‎
Lines changed: 6 additions & 1 deletion b/‎examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart‎
Lines changed: 6 additions & 1 deletion
@@ -43,11 +43,12 @@ post_install do |installer|
     target.build_configurations.each do |config|
       config.build_settings['IPHONEOS_DEPLOYMENT_TARGET'] = '14.0'
 
-      # Enable microphone and speech recognition permissions for permission_handler
+      # Enable permissions for permission_handler
       config.build_settings['GCC_PREPROCESSOR_DEFINITIONS'] ||= [
         '$(inherited)',
         'PERMISSION_MICROPHONE=1',
         'PERMISSION_SPEECH_RECOGNIZER=1',
+        'PERMISSION_CAMERA=1',
       ]
     end
   end
 
@@ -472,6 +472,7 @@
 				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
 				DEVELOPMENT_TEAM = L86FH3K93L;
 				ENABLE_BITCODE = NO;
+				ENABLE_RESOURCE_ACCESS_CAMERA = YES;
 				INFOPLIST_FILE = Runner/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -656,6 +657,7 @@
 				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
 				DEVELOPMENT_TEAM = L86FH3K93L;
 				ENABLE_BITCODE = NO;
+				ENABLE_RESOURCE_ACCESS_CAMERA = YES;
 				INFOPLIST_FILE = Runner/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
@@ -680,6 +682,7 @@
 				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
 				DEVELOPMENT_TEAM = L86FH3K93L;
 				ENABLE_BITCODE = NO;
+				ENABLE_RESOURCE_ACCESS_CAMERA = YES;
 				INFOPLIST_FILE = Runner/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 
@@ -2,6 +2,8 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
+	<key>CADisableMinimumFrameDurationOnPhone</key>
+	<true/>
 	<key>CFBundleDevelopmentRegion</key>
 	<string>$(DEVELOPMENT_LANGUAGE)</string>
 	<key>CFBundleDisplayName</key>
@@ -24,6 +26,14 @@
 	<string>$(FLUTTER_BUILD_NUMBER)</string>
 	<key>LSRequiresIPhoneOS</key>
 	<true/>
+	<key>NSCameraUsageDescription</key>
+	<string>RunAnywhere AI needs access to your camera for vision language model features to analyze images.</string>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>This app needs microphone access for voice assistant features.</string>
+	<key>NSSpeechRecognitionUsageDescription</key>
+	<string>This app needs speech recognition for voice assistant features.</string>
+	<key>UIApplicationSupportsIndirectInputEvents</key>
+	<true/>
 	<key>UILaunchStoryboardName</key>
 	<string>LaunchScreen</string>
 	<key>UIMainStoryboardFile</key>
@@ -41,13 +51,5 @@
 		<string>UIInterfaceOrientationLandscapeLeft</string>
 		<string>UIInterfaceOrientationLandscapeRight</string>
 	</array>
-	<key>CADisableMinimumFrameDurationOnPhone</key>
-	<true/>
-	<key>UIApplicationSupportsIndirectInputEvents</key>
-	<true/>
-	<key>NSMicrophoneUsageDescription</key>
-	<string>This app needs microphone access for voice assistant features.</string>
-	<key>NSSpeechRecognitionUsageDescription</key>
-	<string>This app needs speech recognition for voice assistant features.</string>
 </dict>
 </plist>
@@ -3,14 +3,15 @@ import 'package:runanywhere_ai/core/design_system/app_colors.dart';
 import 'package:runanywhere_ai/features/chat/chat_interface_view.dart';
 import 'package:runanywhere_ai/features/settings/combined_settings_view.dart';
 import 'package:runanywhere_ai/features/tools/tools_view.dart';
+import 'package:runanywhere_ai/features/vision/vision_hub_view.dart';
 import 'package:runanywhere_ai/features/voice/speech_to_text_view.dart';
 import 'package:runanywhere_ai/features/voice/text_to_speech_view.dart';
 import 'package:runanywhere_ai/features/voice/voice_assistant_view.dart';
 
 /// ContentView (mirroring iOS ContentView.swift)
 ///
 /// Main tab-based navigation for the app.
-/// Tabs exactly match iOS: Chat, Transcribe (STT), Speak (TTS), Voice, Settings
+/// Tabs: Chat, Vision, Transcribe (STT), Speak (TTS), Voice, Tools, Settings
 class ContentView extends StatefulWidget {
   const ContentView({super.key});
 
@@ -24,11 +25,12 @@ class _ContentViewState extends State<ContentView> {
   // Tab pages matching iOS structure exactly
   final List<Widget> _pages = const [
     ChatInterfaceView(), // Tab 0: Chat (LLM)
-    SpeechToTextView(), // Tab 1: Speech-to-Text (Transcribe)
-    TextToSpeechView(), // Tab 2: Text-to-Speech (Speak)
-    VoiceAssistantView(), // Tab 3: Voice Assistant (STT + LLM + TTS)
-    ToolsView(), // Tab 4: Tools (Tool Calling)
-    CombinedSettingsView(), // Tab 5: Settings (includes Storage)
+    VisionHubView(), // Tab 1: Vision (VLM + Image Generation)
+    SpeechToTextView(), // Tab 2: Speech-to-Text (Transcribe)
+    TextToSpeechView(), // Tab 3: Text-to-Speech (Speak)
+    VoiceAssistantView(), // Tab 4: Voice Assistant (STT + LLM + TTS)
+    ToolsView(), // Tab 5: Tools (Tool Calling)
+    CombinedSettingsView(), // Tab 6: Settings (includes Storage)
   ];
 
   @override
@@ -53,6 +55,11 @@ class _ContentViewState extends State<ContentView> {
             selectedIcon: Icon(Icons.chat_bubble),
             label: 'Chat',
           ),
+          NavigationDestination(
+            icon: Icon(Icons.visibility_outlined),
+            selectedIcon: Icon(Icons.visibility),
+            label: 'Vision',
+          ),
           NavigationDestination(
             icon: Icon(Icons.graphic_eq_outlined),
             selectedIcon: Icon(Icons.graphic_eq),
 
@@ -211,6 +211,28 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
     // Yield between module registrations
     await Future<void>.delayed(Duration.zero);
 
+    // Register VLM (Vision Language) models
+    // VLM models require 2 files: main model + mmproj (vision projector)
+    // Bundled as tar.gz archives for easy download/extraction
+
+    // SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
+    RunAnywhere.registerModel(
+      id: 'smolvlm-500m-instruct-q8_0',
+      name: 'SmolVLM 500M Instruct',
+      url: Uri.parse(
+          'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-vlm-models-v1/smolvlm-500m-instruct-q8_0.tar.gz'),
+      framework: InferenceFramework.llamaCpp,
+      modality: ModelCategory.multimodal,
+      artifactType: ModelArtifactType.tarGzArchive(
+        structure: ArchiveStructure.directoryBased,
+      ),
+      memoryRequirement: 600000000,
+    );
+    debugPrint('✅ VLM models registered');
+
+    // Yield between module registrations
+    await Future<void>.delayed(Duration.zero);
+
     // Diffusion (image generation) is not registered here. CoreML diffusion is supported
     // only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
 
 
@@ -173,6 +173,71 @@ class PermissionService {
     return true;
   }
 
+  /// Request camera permission with proper handling of all states
+  ///
+  /// Returns true if permission is granted, false otherwise.
+  /// Shows appropriate dialogs for denied/permanently denied states.
+  Future<bool> requestCameraPermission(BuildContext context) async {
+    final status = await Permission.camera.status;
+
+    if (status.isGranted) {
+      return true;
+    }
+
+    if (status.isPermanentlyDenied) {
+      if (!context.mounted) return false;
+      // Permission was permanently denied, show settings dialog
+      final shouldOpenSettings = await _showSettingsDialog(
+        context,
+        title: 'Camera Permission Required',
+        message:
+            'Camera access is required for vision features. Please enable it in Settings.',
+      );
+
+      if (shouldOpenSettings) {
+        await openAppSettings();
+      }
+      return false;
+    }
+
+    // Request permission
+    final result = await Permission.camera.request();
+
+    if (result.isGranted) {
+      return true;
+    }
+
+    if (!context.mounted) return false;
+
+    if (result.isPermanentlyDenied) {
+      // User denied with "Don't ask again", show settings dialog
+      final shouldOpenSettings = await _showSettingsDialog(
+        context,
+        title: 'Camera Permission Required',
+        message:
+            'Camera access is required for vision features. Please enable it in Settings.',
+      );
+
+      if (shouldOpenSettings) {
+        await openAppSettings();
+      }
+    } else if (result.isDenied) {
+      // User denied, show explanation
+      _showDeniedSnackbar(
+        context,
+        'Camera permission is required for vision features.',
+      );
+    }
+
+    return false;
+  }
+
+  /// Check if camera permission is granted without requesting
+  Future<bool> isCameraPermissionGranted() async {
+    final status = await Permission.camera.status;
+    return status.isGranted;
+  }
+
   /// Show dialog to guide user to settings
   Future<bool> _showSettingsDialog(
     BuildContext context, {
 
@@ -483,7 +483,12 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
       setState(() {
         _isLoadingModel = false;
       });
-      Navigator.pop(context);
+      // Defer Navigator.pop until after the current frame completes
+      WidgetsBinding.instance.addPostFrameCallback((_) {
+        if (mounted) {
+          Navigator.pop(context);
+        }
+      });
     }
   }
 
@@ -511,7 +516,14 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
       await widget.onModelSelected(model);
 
       if (mounted) {
-        Navigator.pop(context);
+        // Defer Navigator.pop until after the current frame completes
+        // This prevents the !_debugLocked assertion when the callback triggers
+        // navigation (e.g., loading a VLM model may trigger state changes)
+        WidgetsBinding.instance.addPostFrameCallback((_) {
+          if (mounted) {
+            Navigator.pop(context);
+          }
+        });
       }
     } catch (e) {
       debugPrint('❌ Failed to load model: $e');
 
@@ -249,6 +249,8 @@ class ModelRequiredOverlay extends StatelessWidget {
         return Icons.volume_up;
       case ModelSelectionContext.voice:
         return Icons.mic;
+      case ModelSelectionContext.vlm:
+        return Icons.center_focus_strong;
     }
   }
 
@@ -262,6 +264,8 @@ class ModelRequiredOverlay extends StatelessWidget {
         return 'Text to Speech';
       case ModelSelectionContext.voice:
         return 'Voice Assistant';
+      case ModelSelectionContext.vlm:
+        return 'Vision Language Model';
     }
   }
 
@@ -275,6 +279,8 @@ class ModelRequiredOverlay extends StatelessWidget {
         return 'Select a text-to-speech model to generate audio. Choose from Piper TTS or System TTS.';
       case ModelSelectionContext.voice:
         return 'Voice assistant requires multiple models. Let\'s set them up together.';
+      case ModelSelectionContext.vlm:
+        return 'Select a vision-language model to analyze images. Point your camera or pick a photo to get AI descriptions.';
     }
   }
 }
 
@@ -123,7 +123,8 @@ enum ModelSelectionContext {
   llm,
   stt,
   tts,
-  voice;
+  voice,
+  vlm;
 
   String get title {
     switch (this) {
@@ -135,6 +136,8 @@ enum ModelSelectionContext {
         return 'Select TTS Model';
       case ModelSelectionContext.voice:
         return 'Select Model';
+      case ModelSelectionContext.vlm:
+        return 'Select VLM Model';
     }
   }
 
@@ -153,6 +156,8 @@ enum ModelSelectionContext {
           ModelCategory.speechRecognition,
           ModelCategory.speechSynthesis,
         };
+      case ModelSelectionContext.vlm:
+        return {ModelCategory.vision, ModelCategory.multimodal};
     }
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -249,6 +249,8 @@ class ModelRequiredOverlay extends StatelessWidget {`
`249`	`249`	`return Icons.volume_up;`
`250`	`250`	`case ModelSelectionContext.voice:`
`251`	`251`	`return Icons.mic;`
	`252`	`+ case ModelSelectionContext.vlm:`
	`253`	`+ return Icons.center_focus_strong;`
`252`	`254`	`}`
`253`	`255`	`}`
`254`	`256`
`@@ -262,6 +264,8 @@ class ModelRequiredOverlay extends StatelessWidget {`
`262`	`264`	`return 'Text to Speech';`
`263`	`265`	`case ModelSelectionContext.voice:`
`264`	`266`	`return 'Voice Assistant';`
	`267`	`+ case ModelSelectionContext.vlm:`
	`268`	`+ return 'Vision Language Model';`
`265`	`269`	`}`
`266`	`270`	`}`
`267`	`271`
`@@ -275,6 +279,8 @@ class ModelRequiredOverlay extends StatelessWidget {`
`275`	`279`	`return 'Select a text-to-speech model to generate audio. Choose from Piper TTS or System TTS.';`
`276`	`280`	`case ModelSelectionContext.voice:`
`277`	`281`	`return 'Voice assistant requires multiple models. Let\'s set them up together.';`
	`282`	`+ case ModelSelectionContext.vlm:`
	`283`	`+ return 'Select a vision-language model to analyze images. Point your camera or pick a photo to get AI descriptions.';`
`278`	`284`	`}`
`279`	`285`	`}`
`280`	`286`	`}`
Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,8 @@ enum ModelSelectionContext {`
`123`	`123`	`llm,`
`124`	`124`	`stt,`
`125`	`125`	`tts,`
`126`		`- voice;`
	`126`	`+ voice,`
	`127`	`+ vlm;`
`127`	`128`
`128`	`129`	`String get title {`
`129`	`130`	`switch (this) {`
`@@ -135,6 +136,8 @@ enum ModelSelectionContext {`
`135`	`136`	`return 'Select TTS Model';`
`136`	`137`	`case ModelSelectionContext.voice:`
`137`	`138`	`return 'Select Model';`
	`139`	`+ case ModelSelectionContext.vlm:`
	`140`	`+ return 'Select VLM Model';`
`138`	`141`	`}`
`139`	`142`	`}`
`140`	`143`
`@@ -153,6 +156,8 @@ enum ModelSelectionContext {`
`153`	`156`	`ModelCategory.speechRecognition,`
`154`	`157`	`ModelCategory.speechSynthesis,`
`155`	`158`	`};`
	`159`	`+ case ModelSelectionContext.vlm:`
	`160`	`+ return {ModelCategory.vision, ModelCategory.multimodal};`
`156`	`161`	`}`
`157`	`162`	`}`
`158`	`163`	`}`