Skip to content

Commit 0dc46c1

Browse files
replacing the archive logic and movign to cpp
1 parent 916d713 commit 0dc46c1

37 files changed

Lines changed: 1769 additions & 1967 deletions

File tree

Package.resolved

Lines changed: 0 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Package.swift

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,7 @@ let package = Package(
9292
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
9393
.package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
9494
.package(url: "https://github.com/JohnSundell/Files.git", from: "4.3.0"),
95-
.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"),
9695
.package(url: "https://github.com/devicekit/DeviceKit.git", from: "5.6.0"),
97-
.package(url: "https://github.com/tsolomko/SWCompression.git", from: "4.8.0"),
9896
.package(url: "https://github.com/getsentry/sentry-cocoa", from: "8.40.0"),
9997
// ml-stable-diffusion for CoreML-based image generation
10098
.package(url: "https://github.com/apple/ml-stable-diffusion.git", from: "1.1.0"),
@@ -145,9 +143,7 @@ let package = Package(
145143
.product(name: "Crypto", package: "swift-crypto"),
146144
.product(name: "Alamofire", package: "Alamofire"),
147145
.product(name: "Files", package: "Files"),
148-
.product(name: "ZIPFoundation", package: "ZIPFoundation"),
149146
.product(name: "DeviceKit", package: "DeviceKit"),
150-
.product(name: "SWCompression", package: "SWCompression"),
151147
.product(name: "Sentry", package: "sentry-cocoa"),
152148
.product(name: "StableDiffusion", package: "ml-stable-diffusion"),
153149
"CRACommons",

sdk/runanywhere-commons/CMakeLists.txt

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,87 @@ FetchContent_MakeAvailable(nlohmann_json)
119119
# (nlohmann_json_SOURCE_DIR is set by FetchContent_MakeAvailable)
120120
include_directories(SYSTEM ${nlohmann_json_SOURCE_DIR}/include)
121121

122+
# libarchive - streaming archive extraction (ZIP, TAR.GZ, TAR.BZ2)
123+
# Used for native model archive extraction across all platforms
124+
if(NOT DEFINED LIBARCHIVE_VERSION)
125+
set(LIBARCHIVE_VERSION "3.8.1")
126+
endif()
127+
128+
# -----------------------------------------------------------------------------
129+
# BZip2: Bundle from source for cross-compilation targets
130+
# Android NDK and Emscripten don't ship libbz2. macOS/iOS have it in the SDK.
131+
# We try system first; if not found, build from source so libarchive gets it.
132+
# -----------------------------------------------------------------------------
133+
find_package(BZip2 QUIET)
134+
if(NOT BZIP2_FOUND)
135+
message(STATUS "System BZip2 not found — bundling from source for cross-compilation...")
136+
FetchContent_Declare(
137+
bzip2_src
138+
URL https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz
139+
URL_HASH SHA256=ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269
140+
)
141+
FetchContent_MakeAvailable(bzip2_src)
142+
143+
add_library(bz2_bundled STATIC
144+
${bzip2_src_SOURCE_DIR}/blocksort.c
145+
${bzip2_src_SOURCE_DIR}/huffman.c
146+
${bzip2_src_SOURCE_DIR}/crctable.c
147+
${bzip2_src_SOURCE_DIR}/randtable.c
148+
${bzip2_src_SOURCE_DIR}/compress.c
149+
${bzip2_src_SOURCE_DIR}/decompress.c
150+
${bzip2_src_SOURCE_DIR}/bzlib.c
151+
)
152+
target_include_directories(bz2_bundled PUBLIC ${bzip2_src_SOURCE_DIR})
153+
set_target_properties(bz2_bundled PROPERTIES POSITION_INDEPENDENT_CODE ON)
154+
155+
# Set cache variables so libarchive's find_package(BZip2) picks up our build
156+
set(BZIP2_INCLUDE_DIR "${bzip2_src_SOURCE_DIR}" CACHE PATH "" FORCE)
157+
set(BZIP2_LIBRARIES bz2_bundled CACHE STRING "" FORCE)
158+
set(BZIP2_FOUND TRUE CACHE BOOL "" FORCE)
159+
message(STATUS "Bundled BZip2 ready (v1.0.8)")
160+
else()
161+
message(STATUS "Using system BZip2: ${BZIP2_LIBRARIES}")
162+
endif()
163+
164+
FetchContent_Declare(
165+
libarchive
166+
GIT_REPOSITORY https://github.com/libarchive/libarchive.git
167+
GIT_TAG v${LIBARCHIVE_VERSION}
168+
GIT_SHALLOW TRUE
169+
)
170+
# Disable everything except the static library and the formats we need
171+
set(ENABLE_MBEDTLS OFF CACHE BOOL "" FORCE)
172+
set(ENABLE_NETTLE OFF CACHE BOOL "" FORCE)
173+
set(ENABLE_OPENSSL OFF CACHE BOOL "" FORCE)
174+
set(ENABLE_LIBB2 OFF CACHE BOOL "" FORCE)
175+
set(ENABLE_LZ4 OFF CACHE BOOL "" FORCE)
176+
set(ENABLE_LZO OFF CACHE BOOL "" FORCE)
177+
set(ENABLE_LZMA OFF CACHE BOOL "" FORCE) # tar.xz not currently used by any model
178+
set(ENABLE_ZSTD OFF CACHE BOOL "" FORCE)
179+
set(ENABLE_ZLIB ON CACHE BOOL "" FORCE) # Needed for tar.gz and zip
180+
set(ENABLE_BZip2 ON CACHE BOOL "" FORCE) # Needed for tar.bz2 (k2-fsa models)
181+
set(ENABLE_LIBXML2 OFF CACHE BOOL "" FORCE)
182+
set(ENABLE_EXPAT OFF CACHE BOOL "" FORCE)
183+
set(ENABLE_PCREPOSIX OFF CACHE BOOL "" FORCE)
184+
set(ENABLE_PCRE2POSIX OFF CACHE BOOL "" FORCE)
185+
set(ENABLE_LIBGCC OFF CACHE BOOL "" FORCE)
186+
set(ENABLE_CNG OFF CACHE BOOL "" FORCE)
187+
set(ENABLE_TAR OFF CACHE BOOL "" FORCE) # Don't build bsdtar binary
188+
set(ENABLE_CPIO OFF CACHE BOOL "" FORCE) # Don't build bsdcpio binary
189+
set(ENABLE_CAT OFF CACHE BOOL "" FORCE) # Don't build bsdcat binary
190+
set(ENABLE_UNZIP OFF CACHE BOOL "" FORCE) # Don't build bsdunzip binary
191+
set(ENABLE_TEST OFF CACHE BOOL "" FORCE)
192+
set(ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
193+
set(ENABLE_ACL OFF CACHE BOOL "" FORCE)
194+
set(ENABLE_XATTR OFF CACHE BOOL "" FORCE)
195+
set(ENABLE_ICONV OFF CACHE BOOL "" FORCE)
196+
# Save and restore BUILD_SHARED_LIBS since libarchive respects it
197+
set(_SAVED_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
198+
set(BUILD_SHARED_LIBS OFF)
199+
FetchContent_MakeAvailable(libarchive)
200+
set(BUILD_SHARED_LIBS ${_SAVED_BUILD_SHARED_LIBS})
201+
message(STATUS "libarchive ready (v${LIBARCHIVE_VERSION})")
202+
122203
# =============================================================================
123204
# SERVER DEPENDENCIES (FetchContent)
124205
# =============================================================================
@@ -223,6 +304,7 @@ set(RAC_INFRASTRUCTURE_SOURCES
223304
src/infrastructure/telemetry/telemetry_json.cpp
224305
src/infrastructure/telemetry/telemetry_manager.cpp
225306
src/infrastructure/device/rac_device_manager.cpp
307+
src/infrastructure/extraction/rac_extraction.cpp
226308
)
227309

228310
# Feature sources - LLM, STT, TTS, VAD, Wake Word, VLM, Diffusion (iOS/Apple only)
@@ -330,6 +412,10 @@ if(RAC_BUILD_SHARED)
330412
)
331413
endif()
332414

415+
# libarchive - native archive extraction
416+
target_link_libraries(rac_commons PRIVATE archive_static)
417+
target_include_directories(rac_commons PRIVATE ${libarchive_SOURCE_DIR}/libarchive ${libarchive_BINARY_DIR})
418+
333419
# Platform-specific linking
334420
if(APPLE)
335421
target_link_libraries(rac_commons PUBLIC

sdk/runanywhere-commons/VERSIONS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ LLAMACPP_VERSION=b8011
8181
# =============================================================================
8282
NLOHMANN_JSON_VERSION=3.11.3
8383

84+
# =============================================================================
85+
# libarchive (archive extraction - ZIP, TAR.GZ, TAR.BZ2, TAR.XZ)
86+
# =============================================================================
87+
LIBARCHIVE_VERSION=3.8.1
88+
8489
# =============================================================================
8590
# RAC Commons Version (for remote builds/CI)
8691
# =============================================================================

sdk/runanywhere-commons/exports/RACommons.exports

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ _rac_artifact_infer_from_url
4444
_rac_artifact_requires_download
4545
_rac_artifact_requires_extraction
4646
_rac_extract_archive
47+
_rac_extract_archive_native
48+
_rac_detect_archive_type
4749

4850
# Component Types
4951
_rac_capability_resource_type_raw_value
@@ -61,6 +63,8 @@ _rac_download_manager_get_active_tasks
6163
_rac_download_manager_get_progress
6264
_rac_download_manager_is_healthy
6365
_rac_download_manager_mark_complete
66+
_rac_download_manager_mark_extraction_complete
67+
_rac_download_manager_mark_extraction_failed
6468
_rac_download_manager_mark_failed
6569
_rac_download_manager_pause_all
6670
_rac_download_manager_resume_all

sdk/runanywhere-commons/include/rac/infrastructure/download/rac_download.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,39 @@ RAC_API rac_result_t rac_download_manager_mark_failed(rac_download_manager_handl
392392
const char* task_id, rac_result_t error_code,
393393
const char* error_message);
394394

395+
// =============================================================================
396+
// EXTRACTION COMPLETION API
397+
// =============================================================================
398+
399+
/**
400+
* @brief Mark extraction as completed for a download task.
401+
*
402+
* Called after archive extraction succeeds. Transitions the task
403+
* from EXTRACTING to COMPLETED state.
404+
*
405+
* @param handle Manager handle
406+
* @param task_id Task ID
407+
* @param extracted_path Path to the extracted model directory
408+
* @return RAC_SUCCESS or error code
409+
*/
410+
RAC_API rac_result_t rac_download_manager_mark_extraction_complete(
411+
rac_download_manager_handle_t handle, const char* task_id, const char* extracted_path);
412+
413+
/**
414+
* @brief Mark extraction as failed for a download task.
415+
*
416+
* Called if archive extraction fails.
417+
*
418+
* @param handle Manager handle
419+
* @param task_id Task ID
420+
* @param error_code Extraction error code
421+
* @param error_message Error description (can be NULL)
422+
* @return RAC_SUCCESS or error code
423+
*/
424+
RAC_API rac_result_t rac_download_manager_mark_extraction_failed(
425+
rac_download_manager_handle_t handle, const char* task_id, rac_result_t error_code,
426+
const char* error_message);
427+
395428
// =============================================================================
396429
// MEMORY MANAGEMENT
397430
// =============================================================================
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/**
2+
* @file rac_extraction.h
3+
* @brief RunAnywhere Commons - Native Archive Extraction
4+
*
5+
* Native archive extraction using libarchive.
6+
* Supports ZIP, TAR.GZ, TAR.BZ2, TAR.XZ with streaming extraction
7+
* (constant memory usage regardless of archive size).
8+
*
9+
* Security features:
10+
* - Zip-slip protection (path traversal prevention)
11+
* - macOS resource fork skipping (._files, __MACOSX/)
12+
* - Symbolic link safety (contained within destination)
13+
* - Archive type auto-detection via magic bytes
14+
*/
15+
16+
#ifndef RAC_EXTRACTION_H
17+
#define RAC_EXTRACTION_H
18+
19+
#include "rac/core/rac_error.h"
20+
#include "rac/core/rac_types.h"
21+
#include "rac/infrastructure/model_management/rac_model_types.h"
22+
23+
#ifdef __cplusplus
24+
extern "C" {
25+
#endif
26+
27+
// =============================================================================
28+
// EXTRACTION OPTIONS
29+
// =============================================================================
30+
31+
/**
32+
* @brief Options for archive extraction.
33+
*/
34+
typedef struct rac_extraction_options {
35+
/** Skip macOS resource forks (._ files, __MACOSX/ directories).
36+
* Default: RAC_TRUE */
37+
rac_bool_t skip_macos_resources;
38+
39+
/** Skip symbolic links entirely.
40+
* Default: RAC_FALSE (symlinks are created if safe) */
41+
rac_bool_t skip_symlinks;
42+
43+
/** Archive type hint. RAC_ARCHIVE_TYPE_NONE = auto-detect from magic bytes.
44+
* Default: RAC_ARCHIVE_TYPE_NONE */
45+
rac_archive_type_t archive_type_hint;
46+
} rac_extraction_options_t;
47+
48+
/**
49+
* @brief Default extraction options.
50+
*/
51+
static const rac_extraction_options_t RAC_EXTRACTION_OPTIONS_DEFAULT = {
52+
RAC_TRUE, /* skip_macos_resources */
53+
RAC_FALSE, /* skip_symlinks */
54+
RAC_ARCHIVE_TYPE_NONE /* archive_type_hint */
55+
};
56+
57+
// =============================================================================
58+
// EXTRACTION RESULT
59+
// =============================================================================
60+
61+
/**
62+
* @brief Result of an extraction operation.
63+
*/
64+
typedef struct rac_extraction_result {
65+
/** Number of files extracted */
66+
int32_t files_extracted;
67+
68+
/** Number of directories created */
69+
int32_t directories_created;
70+
71+
/** Total bytes written to disk */
72+
int64_t bytes_extracted;
73+
74+
/** Number of entries skipped (resource forks, unsafe paths) */
75+
int32_t entries_skipped;
76+
} rac_extraction_result_t;
77+
78+
// =============================================================================
79+
// EXTRACTION PROGRESS CALLBACK
80+
// =============================================================================
81+
82+
/**
83+
* @brief Progress callback for extraction.
84+
*
85+
* @param files_extracted Number of files extracted so far
86+
* @param total_files Total files in archive (0 if unknown for streaming formats)
87+
* @param bytes_extracted Bytes written to disk so far
88+
* @param user_data User-provided context
89+
*/
90+
typedef void (*rac_extraction_progress_fn)(int32_t files_extracted, int32_t total_files,
91+
int64_t bytes_extracted, void* user_data);
92+
93+
// =============================================================================
94+
// EXTRACTION API
95+
// =============================================================================
96+
97+
/**
98+
* @brief Extract an archive using native libarchive.
99+
*
100+
* Performs streaming extraction with constant memory usage.
101+
* Auto-detects archive format from magic bytes if archive_type_hint
102+
* is RAC_ARCHIVE_TYPE_NONE.
103+
*
104+
* @param archive_path Path to the archive file
105+
* @param destination_dir Directory to extract into (created if needed)
106+
* @param options Extraction options (NULL for defaults)
107+
* @param progress_callback Progress callback (can be NULL)
108+
* @param user_data Context for progress callback
109+
* @param out_result Output: extraction statistics (can be NULL)
110+
* @return RAC_SUCCESS on success, error code on failure
111+
*
112+
* Error codes:
113+
* - RAC_ERROR_EXTRACTION_FAILED: General extraction error
114+
* - RAC_ERROR_UNSUPPORTED_ARCHIVE: Unrecognized archive format
115+
* - RAC_ERROR_FILE_NOT_FOUND: Archive file does not exist
116+
* - RAC_ERROR_NULL_POINTER: archive_path or destination_dir is NULL
117+
*/
118+
RAC_API rac_result_t rac_extract_archive_native(const char* archive_path,
119+
const char* destination_dir,
120+
const rac_extraction_options_t* options,
121+
rac_extraction_progress_fn progress_callback,
122+
void* user_data,
123+
rac_extraction_result_t* out_result);
124+
125+
/**
126+
* @brief Detect archive type from file magic bytes.
127+
*
128+
* Reads the first few bytes of the file to determine the archive format.
129+
* More reliable than file extension detection.
130+
*
131+
* @param file_path Path to the file
132+
* @param out_type Output: detected archive type
133+
* @return RAC_TRUE if archive type detected, RAC_FALSE otherwise
134+
*/
135+
RAC_API rac_bool_t rac_detect_archive_type(const char* file_path, rac_archive_type_t* out_type);
136+
137+
#ifdef __cplusplus
138+
}
139+
#endif
140+
141+
#endif /* RAC_EXTRACTION_H */

0 commit comments

Comments
 (0)