Skip to content

Commit ed7eb57

Browse files
authored
feat: add MetalRT backend for Apple Silicon inference (#459)
* feat: add MetalRT backend for high-performance Apple Silicon inference Add MetalRT as a new inference backend that uses custom Metal GPU kernels for LLM, STT (Whisper), TTS (Kokoro), and VLM inference on Apple silicon. MetalRT is framework-hint only (RAC_FRAMEWORK_METALRT) — never auto-selected. C++ backend (runanywhere-commons): - LLM vtable adapter wrapping metalrt_c_api.h (generate, stream, adaptive context) - STT adapter for Whisper transcription - TTS adapter for Kokoro synthesis - VLM adapter for vision-language with streaming - Registration with 4 service providers (LLM, STT, TTS, VLM) - RAC_FRAMEWORK_METALRT = 10 added to framework enum - CMake option RAC_BACKEND_METALRT (Apple only, OFF by default) Swift SDK (runanywhere-swift): - MetalRTRuntime module with RunAnywhereModule conformance - MetalRTBackend C bridge module (headers + modulemap) - .metalrt case in InferenceFramework enum + CppBridge mapping - MetalRTRuntime target + RunAnywhereMetalRT library in Package.swift Example app: - Conditional MetalRT.register() in SDK initialization - Placeholder model registrations with .metalrt framework * fix: simulator arm64 slice in xcframeworks and compression API compatibility build-ios.sh: combine SIMULATORARM64 and SIMULATOR builds into a fat binary before creating xcframeworks, so Apple Silicon Macs can run the iOS simulator target. Skips lipo if the binary already contains arm64 (e.g. ONNX backend with pre-built fat sherpa-onnx). ArchiveUtility.swift: fix compression_stream initializer and compression_stream_process flags for latest Xcode SDK.
1 parent 405ff93 commit ed7eb57

27 files changed

Lines changed: 2421 additions & 5 deletions

File tree

Package.swift

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ let package = Package(
8181
name: "RunAnywhereWhisperKit",
8282
targets: ["WhisperKitRuntime"]
8383
),
84+
85+
// =================================================================
86+
// MetalRT Backend - adds LLM/STT/TTS/VLM via custom Metal kernels
87+
// =================================================================
88+
.library(
89+
name: "RunAnywhereMetalRT",
90+
targets: ["MetalRTRuntime"]
91+
),
8492
],
8593
dependencies: [
8694
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
@@ -200,6 +208,34 @@ let package = Package(
200208
]
201209
),
202210

211+
// =================================================================
212+
// MetalRT C Bridge Module - exposes rac_backend_metalrt_register()
213+
// =================================================================
214+
.target(
215+
name: "MetalRTBackend",
216+
dependencies: [],
217+
path: "sdk/runanywhere-swift/Sources/MetalRTRuntime/include",
218+
publicHeadersPath: "."
219+
),
220+
221+
// =================================================================
222+
// MetalRT Runtime Backend (custom Metal GPU kernels)
223+
// =================================================================
224+
.target(
225+
name: "MetalRTRuntime",
226+
dependencies: [
227+
"RunAnywhere",
228+
"MetalRTBackend",
229+
],
230+
path: "sdk/runanywhere-swift/Sources/MetalRTRuntime",
231+
exclude: ["include"],
232+
linkerSettings: [
233+
.linkedLibrary("c++"),
234+
.linkedFramework("Accelerate"),
235+
.linkedFramework("Metal"),
236+
]
237+
),
238+
203239
// =================================================================
204240
// WhisperKit Runtime Backend (Apple Neural Engine STT)
205241
// =================================================================

examples/ios/RunAnywhereAI/RunAnywhereAI/App/RunAnywhereAIApp.swift

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ import RunAnywhere
1010
import LlamaCPPRuntime
1111
import ONNXRuntime
1212
import WhisperKitRuntime
13+
#if canImport(MetalRTRuntime)
14+
import MetalRTRuntime
15+
#endif
1316
#if canImport(UIKit)
1417
import UIKit
1518
#endif
@@ -84,6 +87,9 @@ struct RunAnywhereAIApp: App {
8487
LlamaCPP.register(priority: 100)
8588
ONNX.register(priority: 100)
8689
WhisperKitSTT.register(priority: 200)
90+
#if canImport(MetalRTRuntime)
91+
MetalRT.register(priority: 100)
92+
#endif
8793

8894
// Clear any previous error
8995
await MainActor.run { initializationError = nil }
@@ -327,6 +333,26 @@ struct RunAnywhereAIApp: App {
327333

328334
logger.info("✅ LLM models registered (including tool-calling optimized models)")
329335

336+
// ============================================================================
337+
// Register MetalRT LLM models (custom Metal GPU kernels, framework-hint only)
338+
// These models use MetalRT's safetensors format, NOT GGUF.
339+
// ============================================================================
340+
// TODO: Add MetalRT model download URLs once hosted
341+
// For now, models are loaded from local paths during development.
342+
// Example registration (uncomment when URLs are available):
343+
//
344+
// if let qwen3MetalRTURL = URL(string: "https://huggingface.co/.../Qwen3-0.6B-MLX-4bit.tar.gz") {
345+
// RunAnywhere.registerModel(
346+
// id: "qwen3-0.6b-metalrt",
347+
// name: "Qwen3 0.6B (MetalRT)",
348+
// url: qwen3MetalRTURL,
349+
// framework: .metalrt,
350+
// memoryRequirement: 400_000_000
351+
// )
352+
// }
353+
354+
logger.info("✅ MetalRT models registered (framework-hint only)")
355+
330356
// Register VLM (Vision Language) models
331357
// VLM models require 2 files: main model + mmproj (vision projector)
332358
// Bundled as tar.gz archives for easy download/extraction

sdk/runanywhere-commons/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ option(RAC_BACKEND_RAG "Build RAG pipeline (USearch vector search)" ON)
3737
option(RAC_BACKEND_WHISPERCPP "Build WhisperCPP backend" OFF)
3838
if(APPLE)
3939
option(RAC_BACKEND_WHISPERKIT_COREML "Build WhisperKit CoreML backend (Apple Neural Engine STT)" ON)
40+
option(RAC_BACKEND_METALRT "Build MetalRT backend (custom Metal GPU kernels, Apple only)" OFF)
4041
else()
4142
set(RAC_BACKEND_WHISPERKIT_COREML OFF CACHE BOOL "" FORCE)
43+
set(RAC_BACKEND_METALRT OFF CACHE BOOL "" FORCE)
4244
endif()
4345
option(RAC_BUILD_SERVER "Build OpenAI-compatible HTTP server (runanywhere-server)" OFF)
4446

@@ -411,6 +413,11 @@ if(RAC_BUILD_BACKENDS)
411413
add_subdirectory(src/backends/whispercpp)
412414
endif()
413415

416+
if(RAC_BACKEND_METALRT AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/backends/metalrt/CMakeLists.txt")
417+
message(STATUS " - MetalRT backend")
418+
add_subdirectory(src/backends/metalrt)
419+
endif()
420+
414421
if(RAC_BACKEND_RAG AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/features/rag/CMakeLists.txt")
415422
message(STATUS " - RAG pipeline (USearch) — folded into rac_commons")
416423
add_subdirectory(src/features/rag)
@@ -495,7 +502,7 @@ if(APPLE AND RAC_BUILD_PLATFORM)
495502
message(STATUS " Platform: Apple Foundation Models, System TTS")
496503
endif()
497504
if(RAC_BUILD_BACKENDS)
498-
message(STATUS " Backends: LlamaCPP=${RAC_BACKEND_LLAMACPP}, ONNX=${RAC_BACKEND_ONNX}, WhisperCPP=${RAC_BACKEND_WHISPERCPP}, WhisperKitCoreML=${RAC_BACKEND_WHISPERKIT_COREML}")
505+
message(STATUS " Backends: LlamaCPP=${RAC_BACKEND_LLAMACPP}, ONNX=${RAC_BACKEND_ONNX}, WhisperCPP=${RAC_BACKEND_WHISPERCPP}, WhisperKitCoreML=${RAC_BACKEND_WHISPERKIT_COREML}, MetalRT=${RAC_BACKEND_METALRT}")
499506
if(RAC_BACKEND_RAG)
500507
message(STATUS " RAG pipeline: Enabled (folded into rac_commons)")
501508
endif()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* @file rac_backend_metalrt.h
3+
* @brief RunAnywhere Commons - MetalRT Backend Registration
4+
*
5+
* Public header for the MetalRT backend. MetalRT provides high-performance
6+
* LLM, STT, TTS, and VLM inference using custom Metal GPU kernels on Apple
7+
* silicon. This backend handles models registered with RAC_FRAMEWORK_METALRT.
8+
*
9+
* Apple-only (iOS/macOS).
10+
*/
11+
12+
#ifndef RAC_BACKEND_METALRT_H
13+
#define RAC_BACKEND_METALRT_H
14+
15+
#include "rac/core/rac_error.h"
16+
#include "rac/core/rac_types.h"
17+
18+
#ifdef __cplusplus
19+
extern "C" {
20+
#endif
21+
22+
// =============================================================================
23+
// EXPORT MACRO
24+
// =============================================================================
25+
26+
#if defined(RAC_METALRT_BUILDING)
27+
#if defined(__GNUC__) || defined(__clang__)
28+
#define RAC_METALRT_API __attribute__((visibility("default")))
29+
#else
30+
#define RAC_METALRT_API
31+
#endif
32+
#else
33+
#define RAC_METALRT_API
34+
#endif
35+
36+
// =============================================================================
37+
// BACKEND REGISTRATION
38+
// =============================================================================
39+
40+
/**
41+
* Registers the MetalRT backend with the commons module and service registries.
42+
*
43+
* Registers providers for:
44+
* - LLM (TEXT_GENERATION) — metalrt_generate / metalrt_generate_stream
45+
* - STT (SPEECH_RECOGNITION) — metalrt_whisper_transcribe
46+
* - TTS (TEXT_TO_SPEECH) — metalrt_tts_synthesize
47+
* - VLM (VISION_LANGUAGE) — metalrt_vision_analyze
48+
*
49+
* Should be called once during SDK initialization.
50+
* Only handles models with RAC_FRAMEWORK_METALRT framework hint.
51+
*
52+
* @return RAC_SUCCESS or error code
53+
*/
54+
RAC_METALRT_API rac_result_t rac_backend_metalrt_register(void);
55+
56+
/**
57+
* Unregisters the MetalRT backend.
58+
*
59+
* @return RAC_SUCCESS or error code
60+
*/
61+
RAC_METALRT_API rac_result_t rac_backend_metalrt_unregister(void);
62+
63+
#ifdef __cplusplus
64+
}
65+
#endif
66+
67+
#endif /* RAC_BACKEND_METALRT_H */

sdk/runanywhere-commons/include/rac/infrastructure/model_management/rac_model_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ typedef enum rac_inference_framework {
187187
RAC_FRAMEWORK_MLX = 7, /**< MLX C++ (Apple Silicon VLM) */
188188
RAC_FRAMEWORK_COREML = 8, /**< Core ML (Apple Neural Engine) */
189189
RAC_FRAMEWORK_WHISPERKIT_COREML = 9, /**< WhisperKit CoreML (Apple Neural Engine STT) */
190+
RAC_FRAMEWORK_METALRT = 10, /**< MetalRT (custom Metal GPU kernels, Apple only) */
190191
RAC_FRAMEWORK_UNKNOWN = 99 /**< Unknown framework */
191192
} rac_inference_framework_t;
192193

sdk/runanywhere-commons/scripts/build-ios.sh

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,20 @@ EOF
470470
EOF
471471
done
472472

473-
# SIMULATOR already contains universal binary (arm64 + x86_64)
473+
# Combine SIMULATOR (x86_64) and SIMULATORARM64 (arm64) into a fat binary
474474
local SIM_FAT="${BUILD_DIR}/SIMULATOR"
475+
local SIM_ARM64_BIN="${BUILD_DIR}/SIMULATORARM64/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
476+
local SIM_X86_BIN="${SIM_FAT}/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
477+
if [[ -f "${SIM_ARM64_BIN}" && -f "${SIM_X86_BIN}" ]]; then
478+
# Only combine if the simulator binary doesn't already contain arm64
479+
local SIM_ARCHS
480+
SIM_ARCHS=$(lipo -archs "${SIM_X86_BIN}" 2>/dev/null || echo "")
481+
if [[ "$SIM_ARCHS" != *"arm64"* ]]; then
482+
log_step "Creating fat simulator binary (arm64 + x86_64)..."
483+
lipo -create "${SIM_ARM64_BIN}" "${SIM_X86_BIN}" \
484+
-output "${SIM_FAT}/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
485+
fi
486+
fi
475487

476488
# Create XCFramework using library format (prevents SPM from embedding static libs)
477489
local XCFW_PATH="${DIST_DIR}/${FRAMEWORK_NAME}.xcframework"
@@ -664,8 +676,20 @@ EOF
664676
return 0
665677
fi
666678

667-
# SIMULATOR already contains universal binary (arm64 + x86_64)
679+
# Combine SIMULATOR (x86_64) and SIMULATORARM64 (arm64) into a fat binary
668680
local SIM_FAT="${BUILD_DIR}/SIMULATOR"
681+
local SIM_ARM64_BIN="${BUILD_DIR}/SIMULATORARM64/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
682+
local SIM_X86_BIN="${SIM_FAT}/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
683+
if [[ -f "${SIM_ARM64_BIN}" && -f "${SIM_X86_BIN}" ]]; then
684+
# Only combine if the simulator binary doesn't already contain arm64
685+
local SIM_ARCHS
686+
SIM_ARCHS=$(lipo -archs "${SIM_X86_BIN}" 2>/dev/null || echo "")
687+
if [[ "$SIM_ARCHS" != *"arm64"* ]]; then
688+
log_step "Creating fat simulator binary (arm64 + x86_64)..."
689+
lipo -create "${SIM_ARM64_BIN}" "${SIM_X86_BIN}" \
690+
-output "${SIM_FAT}/${FRAMEWORK_NAME}.framework/${FRAMEWORK_NAME}"
691+
fi
692+
fi
669693

670694
# Create XCFramework using library format (prevents SPM from embedding static libs)
671695
local XCFW_PATH="${DIST_DIR}/${FRAMEWORK_NAME}.xcframework"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# MetalRT Backend — Apple-only (iOS/macOS)
2+
# Requires pre-built libmetalrt_engine.a and metalrt_c_api.h
3+
4+
set(METALRT_SOURCES
5+
rac_llm_metalrt.cpp
6+
rac_stt_metalrt.cpp
7+
rac_tts_metalrt.cpp
8+
rac_vlm_metalrt.cpp
9+
rac_backend_metalrt_register.cpp
10+
)
11+
12+
# MetalRT pre-built library location (set by parent or via -DMETALRT_ROOT=...)
13+
if(NOT DEFINED METALRT_ROOT)
14+
# Default: assume MetalRT is sibling to runanywhere-sdks
15+
set(METALRT_ROOT "${CMAKE_SOURCE_DIR}/../../../MetalRT" CACHE PATH "Path to MetalRT project root")
16+
endif()
17+
18+
set(METALRT_INCLUDE_DIR "${METALRT_ROOT}/src" CACHE PATH "Path to metalrt_c_api.h")
19+
20+
# Find the pre-built static library
21+
if(DEFINED METALRT_LIB_DIR)
22+
set(_metalrt_lib_dir "${METALRT_LIB_DIR}")
23+
else()
24+
set(_metalrt_lib_dir "${METALRT_ROOT}/build")
25+
endif()
26+
27+
add_library(rac_backend_metalrt STATIC ${METALRT_SOURCES})
28+
29+
target_include_directories(rac_backend_metalrt PRIVATE
30+
${CMAKE_SOURCE_DIR}/include
31+
${CMAKE_CURRENT_SOURCE_DIR}
32+
${METALRT_INCLUDE_DIR}
33+
)
34+
35+
target_compile_definitions(rac_backend_metalrt PRIVATE RAC_METALRT_BUILDING)
36+
37+
# Link MetalRT static library
38+
find_library(METALRT_ENGINE_LIB
39+
NAMES metalrt_engine
40+
PATHS ${_metalrt_lib_dir}
41+
NO_DEFAULT_PATH
42+
)
43+
44+
if(METALRT_ENGINE_LIB)
45+
target_link_libraries(rac_backend_metalrt PRIVATE ${METALRT_ENGINE_LIB})
46+
message(STATUS "MetalRT: Found libmetalrt_engine at ${METALRT_ENGINE_LIB}")
47+
else()
48+
message(WARNING "MetalRT: libmetalrt_engine.a not found in ${_metalrt_lib_dir} — will need to be linked by the consuming app")
49+
endif()
50+
51+
# Apple frameworks
52+
target_link_libraries(rac_backend_metalrt PRIVATE
53+
"-framework Metal"
54+
"-framework Foundation"
55+
"-framework Accelerate"
56+
)

0 commit comments

Comments
 (0)