Skip to content

Commit 274fb63

Browse files
updates
1 parent ff52534 commit 274fb63

8 files changed

Lines changed: 375 additions & 61 deletions

File tree

examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ struct RunAnywhereAIApp: App {
465465
)
466466
}
467467

468-
// --- MetalRT VLM model (Qwen3-VL) ---
468+
// --- MetalRT VLM models ---
469469

470470
if let url = URL(string: "\(metalrtBase)/qwen3-vl-2b-metalrt.tar.gz") {
471471
RunAnywhere.registerModel(
@@ -479,6 +479,18 @@ struct RunAnywhereAIApp: App {
479479
)
480480
}
481481

482+
if let url = URL(string: "\(metalrtBase)/lfm25-vl-metalrt.tar.gz") {
483+
RunAnywhere.registerModel(
484+
id: "lfm25-vl-metalrt",
485+
name: "LFM2.5-VL 1.6B (MetalRT)",
486+
url: url,
487+
framework: .metalrt,
488+
modality: .multimodal,
489+
artifactType: .archive(.tarGz, structure: .nestedDirectory),
490+
memoryRequirement: 1_600_000_000
491+
)
492+
}
493+
482494
logger.info("✅ MetalRT models registered")
483495
#else
484496
logger.info("ℹ️ MetalRT not available (MetalRTRuntime not linked)")

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Benchmarks/Services/DiffusionBenchmarkProvider.swift

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ struct DiffusionBenchmarkProvider: BenchmarkScenarioProvider {
5757
guidanceScale: 0.0,
5858
seed: 42
5959
)
60-
// Note: prompt: is required by the SDK API signature, but is ignored when options is provided
61-
// (the SDK uses `options ?? DiffusionGenerationOptions(prompt: prompt)`).
62-
let result = try await RunAnywhere.generateImage(prompt: options.prompt, options: options)
60+
let result = try await RunAnywhere.generateImage(prompt: options.prompt, options: options)
6361

6462
metrics.endToEndLatencyMs = Date().timeIntervalSince(benchStart) * 1000
6563
metrics.generationTimeMs = Double(result.generationTimeMs)

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Benchmarks/Services/VLMBenchmarkProvider.swift

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ struct VLMBenchmarkProvider: BenchmarkScenarioProvider {
1717

1818
func scenarios() -> [BenchmarkScenario] {
1919
[
20-
BenchmarkScenario(name: "Solid Red Image", category: .vlm, parameters: ["type": "solid"]),
21-
BenchmarkScenario(name: "Gradient Image", category: .vlm, parameters: ["type": "gradient"]),
20+
BenchmarkScenario(name: "Image Description", category: .vlm, parameters: ["type": "gradient"]),
2221
]
2322
}
2423

@@ -31,27 +30,34 @@ struct VLMBenchmarkProvider: BenchmarkScenarioProvider {
3130

3231
let memBefore = SyntheticInputGenerator.availableMemoryBytes()
3332

34-
// Load (pass ModelInfo object)
33+
// Ensure clean state: unload any VLM model left over from Camera or a previous run
34+
await RunAnywhere.unloadVLMModel()
35+
// Also unload any lingering LLM model to free memory headroom
36+
try? await RunAnywhere.unloadModel()
37+
// Brief pause to let iOS reclaim GPU/Metal memory from the previous model
38+
try await Task.sleep(nanoseconds: 500_000_000) // 0.5s
39+
40+
// Load
3541
let loadStart = Date()
3642
try await RunAnywhere.loadVLMModel(model)
3743
metrics.loadTimeMs = Date().timeIntervalSince(loadStart) * 1000
3844

3945
do {
40-
// Generate image
41-
let image: UIImage
42-
switch scenario.parameters?["type"] {
43-
case "solid":
44-
image = SyntheticInputGenerator.solidColorImage()
45-
default:
46-
image = SyntheticInputGenerator.gradientImage()
46+
// Generate a small synthetic image inside an autoreleasepool so CoreGraphics
47+
// intermediates are released promptly before we allocate the vision encoder.
48+
let vlmImage: VLMImage = autoreleasepool {
49+
let image = SyntheticInputGenerator.gradientImage()
50+
return VLMImage(image: image)
4751
}
48-
let vlmImage = VLMImage(image: image)
4952

50-
// Warmup
53+
// Warmup: single token to prime the pipeline without large KV allocation
5154
let warmupStart = Date()
52-
_ = try await RunAnywhere.processImage(vlmImage, prompt: "Hi", maxTokens: 5, temperature: 0.0)
55+
_ = try await RunAnywhere.processImage(vlmImage, prompt: "Hi", maxTokens: 1, temperature: 0.0)
5356
metrics.warmupTimeMs = Date().timeIntervalSince(warmupStart) * 1000
5457

58+
// Cancel to flush any lingering generation state / KV cache before the real run
59+
await RunAnywhere.cancelVLMGeneration()
60+
5561
// Benchmark
5662
let result = try await RunAnywhere.processImage(
5763
vlmImage,
@@ -68,9 +74,12 @@ struct VLMBenchmarkProvider: BenchmarkScenarioProvider {
6874
metrics.memoryDeltaBytes = memBefore - memAfter
6975

7076
await RunAnywhere.unloadVLMModel()
77+
// Give iOS time to release GPU/Metal buffers before the next model loads
78+
try? await Task.sleep(nanoseconds: 300_000_000) // 0.3s
7179
return metrics
7280
} catch {
7381
await RunAnywhere.unloadVLMModel()
82+
try? await Task.sleep(nanoseconds: 300_000_000)
7483
throw error
7584
}
7685
#else

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Benchmarks/Views/BenchmarkDashboardView.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ private struct CategoryScenariosRow: View {
248248
case .tts:
249249
return "Short text, Medium text — measures audio duration, char throughput"
250250
case .vlm:
251-
return "Solid color, Gradient image (224×224) — measures tok/s, completion tokens"
251+
return "Gradient image (224×224) — measures tok/s, completion tokens"
252252
case .diffusion:
253253
return "Simple prompt, 10 steps, seed 42 — measures generation time"
254254
}

examples/ios/RunAnywhereAI/RunAnywhereAI/Features/Benchmarks/Views/BenchmarkDetailView.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,9 @@ private struct MetricsGrid: View {
213213
if let chars = metrics.charactersProcessed { items.append(("Chars", "\(chars)")) }
214214
case .vlm:
215215
if let tps = metrics.tokensPerSecond { items.append(("tok/s", String(format: "%.1f", tps))) }
216-
if let ct = metrics.completionTokens { items.append(("Tokens", "\(ct)")) }
216+
if let pt = metrics.promptTokens, pt > 0 { items.append(("Prompt Tok", "\(pt)")) }
217+
if let ct = metrics.completionTokens { items.append(("Comp Tok", "\(ct)")) }
218+
if metrics.warmupTimeMs > 0 { items.append(("Warmup", String(format: "%.0fms", metrics.warmupTimeMs))) }
217219
case .diffusion:
218220
if let gen = metrics.generationTimeMs { items.append(("Gen", String(format: "%.0fms", gen))) }
219221
}

sdk/runanywhere-commons/src/backends/metalrt/rac_vlm_metalrt.cpp

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,32 @@
55

66
#include "rac_vlm_metalrt.h"
77

8+
#include <cstdint>
89
#include <cstdio>
910
#include <cstdlib>
1011
#include <cstring>
12+
#include <string>
13+
#include <vector>
1114

1215
#include "metalrt_c_api.h"
1316

1417
#include "rac/core/rac_logger.h"
1518

1619
static const char* LOG_CAT = "VLM.MetalRT";
1720

21+
// Expand 3-byte RGB to 4-byte RGBA (alpha=0xFF) for MetalRT's pixel API.
22+
static std::vector<uint8_t> rgb_to_rgba(const uint8_t* rgb, uint32_t w, uint32_t h) {
23+
size_t n_pixels = (size_t)w * h;
24+
std::vector<uint8_t> rgba(n_pixels * 4);
25+
for (size_t i = 0; i < n_pixels; i++) {
26+
rgba[i * 4 + 0] = rgb[i * 3 + 0];
27+
rgba[i * 4 + 1] = rgb[i * 3 + 1];
28+
rgba[i * 4 + 2] = rgb[i * 3 + 2];
29+
rgba[i * 4 + 3] = 0xFF;
30+
}
31+
return rgba;
32+
}
33+
1834
struct rac_vlm_metalrt_impl {
1935
void* handle; // metalrt_vision_create() handle
2036
bool loaded;
@@ -65,34 +81,29 @@ rac_result_t rac_vlm_metalrt_process(rac_handle_t handle, const rac_vlm_image_t*
6581
auto* impl = static_cast<rac_vlm_metalrt_impl*>(handle);
6682
if (!impl->loaded) return RAC_ERROR_BACKEND_NOT_READY;
6783

68-
// MetalRT needs a file path — handle different image formats
69-
const char* image_path = nullptr;
70-
char tmp_path[256] = {};
71-
72-
if (image->format == RAC_VLM_IMAGE_FORMAT_FILE_PATH) {
73-
image_path = image->file_path;
74-
} else {
75-
// For non-file formats, write to a temp file
76-
// This is a simplification — production code would handle RGB/base64 properly
77-
RAC_LOG_ERROR(LOG_CAT, "MetalRT VLM only supports FILE_PATH image format");
78-
return RAC_ERROR_VALIDATION_FAILED;
79-
}
80-
81-
if (!image_path || image_path[0] == '\0') {
82-
return RAC_ERROR_NULL_POINTER;
83-
}
84-
8584
struct MetalRTVisionOptions vopts = {};
8685
vopts.max_tokens = options ? options->max_tokens : 256;
8786
vopts.temperature = options ? options->temperature : 0.0f;
8887
vopts.top_k = 40;
8988
vopts.think = false;
9089

91-
struct MetalRTVisionResult result = metalrt_vision_analyze(impl->handle, image_path, prompt, &vopts);
90+
struct MetalRTVisionResult result = {};
91+
92+
if (image->format == RAC_VLM_IMAGE_FORMAT_FILE_PATH && image->file_path) {
93+
result = metalrt_vision_analyze(impl->handle, image->file_path, prompt, &vopts);
94+
} else if (image->format == RAC_VLM_IMAGE_FORMAT_RGB_PIXELS && image->pixel_data) {
95+
auto rgba = rgb_to_rgba(image->pixel_data, image->width, image->height);
96+
result = metalrt_vision_analyze_pixels(impl->handle, rgba.data(),
97+
(int)image->width, (int)image->height,
98+
prompt, &vopts);
99+
} else {
100+
RAC_LOG_ERROR(LOG_CAT, "Unsupported image format: %d", image->format);
101+
return RAC_ERROR_VALIDATION_FAILED;
102+
}
92103

93104
out_result->text = result.text ? strdup(result.text) : nullptr;
94105
out_result->prompt_tokens = result.prompt_tokens;
95-
out_result->image_tokens = 0; // MetalRT doesn't separate image token count
106+
out_result->image_tokens = 0;
96107
out_result->completion_tokens = result.generated_tokens;
97108
out_result->total_tokens = result.prompt_tokens + result.generated_tokens;
98109
out_result->time_to_first_token_ms = static_cast<int64_t>(result.prefill_ms);
@@ -125,20 +136,27 @@ rac_result_t rac_vlm_metalrt_process_stream(rac_handle_t handle, const rac_vlm_i
125136
auto* impl = static_cast<rac_vlm_metalrt_impl*>(handle);
126137
if (!impl->loaded) return RAC_ERROR_BACKEND_NOT_READY;
127138

128-
if (image->format != RAC_VLM_IMAGE_FORMAT_FILE_PATH || !image->file_path) {
129-
RAC_LOG_ERROR(LOG_CAT, "MetalRT VLM only supports FILE_PATH image format");
130-
return RAC_ERROR_VALIDATION_FAILED;
131-
}
132-
133139
struct MetalRTVisionOptions vopts = {};
134140
vopts.max_tokens = options ? options->max_tokens : 256;
135141
vopts.temperature = options ? options->temperature : 0.0f;
136142
vopts.top_k = 40;
137143
vopts.think = false;
138144

139145
VLMStreamCtx ctx = {callback, user_data};
140-
struct MetalRTVisionResult result = metalrt_vision_analyze_stream(
141-
impl->handle, image->file_path, prompt, vlm_stream_bridge, &ctx, &vopts);
146+
struct MetalRTVisionResult result = {};
147+
148+
if (image->format == RAC_VLM_IMAGE_FORMAT_FILE_PATH && image->file_path) {
149+
result = metalrt_vision_analyze_stream(
150+
impl->handle, image->file_path, prompt, vlm_stream_bridge, &ctx, &vopts);
151+
} else if (image->format == RAC_VLM_IMAGE_FORMAT_RGB_PIXELS && image->pixel_data) {
152+
auto rgba = rgb_to_rgba(image->pixel_data, image->width, image->height);
153+
result = metalrt_vision_analyze_pixels_stream(
154+
impl->handle, rgba.data(), (int)image->width, (int)image->height,
155+
prompt, vlm_stream_bridge, &ctx, &vopts);
156+
} else {
157+
RAC_LOG_ERROR(LOG_CAT, "Unsupported image format for streaming: %d", image->format);
158+
return RAC_ERROR_VALIDATION_FAILED;
159+
}
142160

143161
metalrt_vision_free_result(result);
144162
return RAC_SUCCESS;

sdk/runanywhere-commons/src/features/vlm/vlm_component.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -441,25 +441,29 @@ extern "C" rac_result_t rac_vlm_component_load_model_by_id(rac_handle_t handle,
441441
}
442442
}
443443

444-
// 3. Resolve model files within the directory
445-
char model_path[1024] = {};
446-
char mmproj_path[1024] = {};
447-
result = rac_vlm_resolve_model_files(model_folder, model_path, sizeof(model_path), mmproj_path,
448-
sizeof(mmproj_path));
449-
if (result != RAC_SUCCESS) {
450-
RAC_LOG_ERROR(LOG_CAT, "Failed to resolve model files in: %s", model_folder);
451-
rac_model_info_free(model_info);
452-
return result;
453-
}
454-
455-
// 4. Delegate to the existing load function
456-
const char* mmproj = mmproj_path[0] != '\0' ? mmproj_path : nullptr;
444+
// 3. For directory-based models (MetalRT), pass the directory directly.
445+
// For GGUF-based models (llama.cpp), resolve .gguf + mmproj files.
457446
const char* name = model_info->name ? model_info->name : model_id;
458447

459-
RAC_LOG_INFO(LOG_CAT, "Loading VLM model by ID: %s (model=%s, mmproj=%s)", model_id, model_path,
460-
mmproj ? mmproj : "none");
448+
if (rac_framework_uses_directory_based_models(model_info->framework) == RAC_TRUE) {
449+
RAC_LOG_INFO(LOG_CAT, "Loading directory-based VLM model by ID: %s (dir=%s)", model_id, model_folder);
450+
result = rac_vlm_component_load_model(handle, model_folder, nullptr, model_id, name);
451+
} else {
452+
char model_path[1024] = {};
453+
char mmproj_path[1024] = {};
454+
result = rac_vlm_resolve_model_files(model_folder, model_path, sizeof(model_path), mmproj_path,
455+
sizeof(mmproj_path));
456+
if (result != RAC_SUCCESS) {
457+
RAC_LOG_ERROR(LOG_CAT, "Failed to resolve model files in: %s", model_folder);
458+
rac_model_info_free(model_info);
459+
return result;
460+
}
461461

462-
result = rac_vlm_component_load_model(handle, model_path, mmproj, model_id, name);
462+
const char* mmproj = mmproj_path[0] != '\0' ? mmproj_path : nullptr;
463+
RAC_LOG_INFO(LOG_CAT, "Loading VLM model by ID: %s (model=%s, mmproj=%s)", model_id, model_path,
464+
mmproj ? mmproj : "none");
465+
result = rac_vlm_component_load_model(handle, model_path, mmproj, model_id, name);
466+
}
463467

464468
rac_model_info_free(model_info);
465469
return result;

0 commit comments

Comments
 (0)