Skip to content

Commit 5532e6d

Browse files
VyasGuruclaude
andauthored
VLM in Flutter SDK (#367)
* VLM in Flutter SDK Have made changes to the flutter sdk for supporting vlm, following the swift sdk. Once the example app also has the corresponding changes, ill be able to fully test out this thing. Have test built it. * Flutter example app * React-Native Sdk VLM support * React Native EXample app Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Pod and infoplist for camera Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f1a4e57 commit 5532e6d

51 files changed

Lines changed: 5429 additions & 110 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

examples/flutter/RunAnywhereAI/ios/Podfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,12 @@ post_install do |installer|
4343
target.build_configurations.each do |config|
4444
config.build_settings['IPHONEOS_DEPLOYMENT_TARGET'] = '14.0'
4545

46-
# Enable microphone and speech recognition permissions for permission_handler
46+
# Enable permissions for permission_handler
4747
config.build_settings['GCC_PREPROCESSOR_DEFINITIONS'] ||= [
4848
'$(inherited)',
4949
'PERMISSION_MICROPHONE=1',
5050
'PERMISSION_SPEECH_RECOGNIZER=1',
51+
'PERMISSION_CAMERA=1',
5152
]
5253
end
5354
end

examples/flutter/RunAnywhereAI/ios/Runner.xcodeproj/project.pbxproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@
472472
CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
473473
DEVELOPMENT_TEAM = L86FH3K93L;
474474
ENABLE_BITCODE = NO;
475+
ENABLE_RESOURCE_ACCESS_CAMERA = YES;
475476
INFOPLIST_FILE = Runner/Info.plist;
476477
LD_RUNPATH_SEARCH_PATHS = (
477478
"$(inherited)",
@@ -656,6 +657,7 @@
656657
CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
657658
DEVELOPMENT_TEAM = L86FH3K93L;
658659
ENABLE_BITCODE = NO;
660+
ENABLE_RESOURCE_ACCESS_CAMERA = YES;
659661
INFOPLIST_FILE = Runner/Info.plist;
660662
LD_RUNPATH_SEARCH_PATHS = (
661663
"$(inherited)",
@@ -680,6 +682,7 @@
680682
CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
681683
DEVELOPMENT_TEAM = L86FH3K93L;
682684
ENABLE_BITCODE = NO;
685+
ENABLE_RESOURCE_ACCESS_CAMERA = YES;
683686
INFOPLIST_FILE = Runner/Info.plist;
684687
LD_RUNPATH_SEARCH_PATHS = (
685688
"$(inherited)",

examples/flutter/RunAnywhereAI/ios/Runner/Info.plist

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
33
<plist version="1.0">
44
<dict>
5+
<key>CADisableMinimumFrameDurationOnPhone</key>
6+
<true/>
57
<key>CFBundleDevelopmentRegion</key>
68
<string>$(DEVELOPMENT_LANGUAGE)</string>
79
<key>CFBundleDisplayName</key>
@@ -24,6 +26,14 @@
2426
<string>$(FLUTTER_BUILD_NUMBER)</string>
2527
<key>LSRequiresIPhoneOS</key>
2628
<true/>
29+
<key>NSCameraUsageDescription</key>
30+
<string>RunAnywhere AI needs access to your camera for vision language model features to analyze images.</string>
31+
<key>NSMicrophoneUsageDescription</key>
32+
<string>This app needs microphone access for voice assistant features.</string>
33+
<key>NSSpeechRecognitionUsageDescription</key>
34+
<string>This app needs speech recognition for voice assistant features.</string>
35+
<key>UIApplicationSupportsIndirectInputEvents</key>
36+
<true/>
2737
<key>UILaunchStoryboardName</key>
2838
<string>LaunchScreen</string>
2939
<key>UIMainStoryboardFile</key>
@@ -41,13 +51,5 @@
4151
<string>UIInterfaceOrientationLandscapeLeft</string>
4252
<string>UIInterfaceOrientationLandscapeRight</string>
4353
</array>
44-
<key>CADisableMinimumFrameDurationOnPhone</key>
45-
<true/>
46-
<key>UIApplicationSupportsIndirectInputEvents</key>
47-
<true/>
48-
<key>NSMicrophoneUsageDescription</key>
49-
<string>This app needs microphone access for voice assistant features.</string>
50-
<key>NSSpeechRecognitionUsageDescription</key>
51-
<string>This app needs speech recognition for voice assistant features.</string>
5254
</dict>
5355
</plist>

examples/flutter/RunAnywhereAI/lib/app/content_view.dart

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ import 'package:runanywhere_ai/core/design_system/app_colors.dart';
33
import 'package:runanywhere_ai/features/chat/chat_interface_view.dart';
44
import 'package:runanywhere_ai/features/settings/combined_settings_view.dart';
55
import 'package:runanywhere_ai/features/tools/tools_view.dart';
6+
import 'package:runanywhere_ai/features/vision/vision_hub_view.dart';
67
import 'package:runanywhere_ai/features/voice/speech_to_text_view.dart';
78
import 'package:runanywhere_ai/features/voice/text_to_speech_view.dart';
89
import 'package:runanywhere_ai/features/voice/voice_assistant_view.dart';
910

1011
/// ContentView (mirroring iOS ContentView.swift)
1112
///
1213
/// Main tab-based navigation for the app.
13-
/// Tabs exactly match iOS: Chat, Transcribe (STT), Speak (TTS), Voice, Settings
14+
/// Tabs: Chat, Vision, Transcribe (STT), Speak (TTS), Voice, Tools, Settings
1415
class ContentView extends StatefulWidget {
1516
const ContentView({super.key});
1617

@@ -24,11 +25,12 @@ class _ContentViewState extends State<ContentView> {
2425
// Tab pages matching iOS structure exactly
2526
final List<Widget> _pages = const [
2627
ChatInterfaceView(), // Tab 0: Chat (LLM)
27-
SpeechToTextView(), // Tab 1: Speech-to-Text (Transcribe)
28-
TextToSpeechView(), // Tab 2: Text-to-Speech (Speak)
29-
VoiceAssistantView(), // Tab 3: Voice Assistant (STT + LLM + TTS)
30-
ToolsView(), // Tab 4: Tools (Tool Calling)
31-
CombinedSettingsView(), // Tab 5: Settings (includes Storage)
28+
VisionHubView(), // Tab 1: Vision (VLM + Image Generation)
29+
SpeechToTextView(), // Tab 2: Speech-to-Text (Transcribe)
30+
TextToSpeechView(), // Tab 3: Text-to-Speech (Speak)
31+
VoiceAssistantView(), // Tab 4: Voice Assistant (STT + LLM + TTS)
32+
ToolsView(), // Tab 5: Tools (Tool Calling)
33+
CombinedSettingsView(), // Tab 6: Settings (includes Storage)
3234
];
3335

3436
@override
@@ -53,6 +55,11 @@ class _ContentViewState extends State<ContentView> {
5355
selectedIcon: Icon(Icons.chat_bubble),
5456
label: 'Chat',
5557
),
58+
NavigationDestination(
59+
icon: Icon(Icons.visibility_outlined),
60+
selectedIcon: Icon(Icons.visibility),
61+
label: 'Vision',
62+
),
5663
NavigationDestination(
5764
icon: Icon(Icons.graphic_eq_outlined),
5865
selectedIcon: Icon(Icons.graphic_eq),

examples/flutter/RunAnywhereAI/lib/app/runanywhere_ai_app.dart

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,28 @@ class _RunAnywhereAIAppState extends State<RunAnywhereAIApp> {
211211
// Yield between module registrations
212212
await Future<void>.delayed(Duration.zero);
213213

214+
// Register VLM (Vision Language) models
215+
// VLM models require 2 files: main model + mmproj (vision projector)
216+
// Bundled as tar.gz archives for easy download/extraction
217+
218+
// SmolVLM 500M - Ultra-lightweight VLM for mobile (~500MB total)
219+
RunAnywhere.registerModel(
220+
id: 'smolvlm-500m-instruct-q8_0',
221+
name: 'SmolVLM 500M Instruct',
222+
url: Uri.parse(
223+
'https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-vlm-models-v1/smolvlm-500m-instruct-q8_0.tar.gz'),
224+
framework: InferenceFramework.llamaCpp,
225+
modality: ModelCategory.multimodal,
226+
artifactType: ModelArtifactType.tarGzArchive(
227+
structure: ArchiveStructure.directoryBased,
228+
),
229+
memoryRequirement: 600000000,
230+
);
231+
debugPrint('✅ VLM models registered');
232+
233+
// Yield between module registrations
234+
await Future<void>.delayed(Duration.zero);
235+
214236
// Diffusion (image generation) is not registered here. CoreML diffusion is supported
215237
// only in the Swift SDK and Swift example app; Flutter/RN do not register diffusion.
216238

examples/flutter/RunAnywhereAI/lib/core/services/permission_service.dart

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,71 @@ class PermissionService {
173173
return true;
174174
}
175175

176+
/// Request camera permission with proper handling of all states
177+
///
178+
/// Returns true if permission is granted, false otherwise.
179+
/// Shows appropriate dialogs for denied/permanently denied states.
180+
Future<bool> requestCameraPermission(BuildContext context) async {
181+
final status = await Permission.camera.status;
182+
183+
if (status.isGranted) {
184+
return true;
185+
}
186+
187+
if (status.isPermanentlyDenied) {
188+
if (!context.mounted) return false;
189+
// Permission was permanently denied, show settings dialog
190+
final shouldOpenSettings = await _showSettingsDialog(
191+
context,
192+
title: 'Camera Permission Required',
193+
message:
194+
'Camera access is required for vision features. Please enable it in Settings.',
195+
);
196+
197+
if (shouldOpenSettings) {
198+
await openAppSettings();
199+
}
200+
return false;
201+
}
202+
203+
// Request permission
204+
final result = await Permission.camera.request();
205+
206+
if (result.isGranted) {
207+
return true;
208+
}
209+
210+
if (!context.mounted) return false;
211+
212+
if (result.isPermanentlyDenied) {
213+
// User denied with "Don't ask again", show settings dialog
214+
final shouldOpenSettings = await _showSettingsDialog(
215+
context,
216+
title: 'Camera Permission Required',
217+
message:
218+
'Camera access is required for vision features. Please enable it in Settings.',
219+
);
220+
221+
if (shouldOpenSettings) {
222+
await openAppSettings();
223+
}
224+
} else if (result.isDenied) {
225+
// User denied, show explanation
226+
_showDeniedSnackbar(
227+
context,
228+
'Camera permission is required for vision features.',
229+
);
230+
}
231+
232+
return false;
233+
}
234+
235+
/// Check if camera permission is granted without requesting
236+
Future<bool> isCameraPermissionGranted() async {
237+
final status = await Permission.camera.status;
238+
return status.isGranted;
239+
}
240+
176241
/// Show dialog to guide user to settings
177242
Future<bool> _showSettingsDialog(
178243
BuildContext context, {

examples/flutter/RunAnywhereAI/lib/features/models/model_selection_sheet.dart

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,12 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
483483
setState(() {
484484
_isLoadingModel = false;
485485
});
486-
Navigator.pop(context);
486+
// Defer Navigator.pop until after the current frame completes
487+
WidgetsBinding.instance.addPostFrameCallback((_) {
488+
if (mounted) {
489+
Navigator.pop(context);
490+
}
491+
});
487492
}
488493
}
489494

@@ -511,7 +516,14 @@ class _ModelSelectionSheetState extends State<ModelSelectionSheet> {
511516
await widget.onModelSelected(model);
512517

513518
if (mounted) {
514-
Navigator.pop(context);
519+
// Defer Navigator.pop until after the current frame completes
520+
// This prevents the !_debugLocked assertion when the callback triggers
521+
// navigation (e.g., loading a VLM model may trigger state changes)
522+
WidgetsBinding.instance.addPostFrameCallback((_) {
523+
if (mounted) {
524+
Navigator.pop(context);
525+
}
526+
});
515527
}
516528
} catch (e) {
517529
debugPrint('❌ Failed to load model: $e');

examples/flutter/RunAnywhereAI/lib/features/models/model_status_components.dart

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ class ModelRequiredOverlay extends StatelessWidget {
249249
return Icons.volume_up;
250250
case ModelSelectionContext.voice:
251251
return Icons.mic;
252+
case ModelSelectionContext.vlm:
253+
return Icons.center_focus_strong;
252254
}
253255
}
254256

@@ -262,6 +264,8 @@ class ModelRequiredOverlay extends StatelessWidget {
262264
return 'Text to Speech';
263265
case ModelSelectionContext.voice:
264266
return 'Voice Assistant';
267+
case ModelSelectionContext.vlm:
268+
return 'Vision Language Model';
265269
}
266270
}
267271

@@ -275,6 +279,8 @@ class ModelRequiredOverlay extends StatelessWidget {
275279
return 'Select a text-to-speech model to generate audio. Choose from Piper TTS or System TTS.';
276280
case ModelSelectionContext.voice:
277281
return 'Voice assistant requires multiple models. Let\'s set them up together.';
282+
case ModelSelectionContext.vlm:
283+
return 'Select a vision-language model to analyze images. Point your camera or pick a photo to get AI descriptions.';
278284
}
279285
}
280286
}

examples/flutter/RunAnywhereAI/lib/features/models/model_types.dart

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ enum ModelSelectionContext {
123123
llm,
124124
stt,
125125
tts,
126-
voice;
126+
voice,
127+
vlm;
127128

128129
String get title {
129130
switch (this) {
@@ -135,6 +136,8 @@ enum ModelSelectionContext {
135136
return 'Select TTS Model';
136137
case ModelSelectionContext.voice:
137138
return 'Select Model';
139+
case ModelSelectionContext.vlm:
140+
return 'Select VLM Model';
138141
}
139142
}
140143

@@ -153,6 +156,8 @@ enum ModelSelectionContext {
153156
ModelCategory.speechRecognition,
154157
ModelCategory.speechSynthesis,
155158
};
159+
case ModelSelectionContext.vlm:
160+
return {ModelCategory.vision, ModelCategory.multimodal};
156161
}
157162
}
158163
}

0 commit comments

Comments
 (0)