Skip to content

Commit f5c49ff

Browse files
feat(web): implement streaming model import via WORKERFS mounting (Is… (#394)
* feat(web): implement streaming model import via WORKERFS mounting (Issue #372) # Conflicts: # sdk/runanywhere-web/packages/core/src/Infrastructure/ModelDownloader.ts # sdk/runanywhere-web/packages/core/src/Infrastructure/ModelLoaderTypes.ts # sdk/runanywhere-web/packages/core/src/Infrastructure/ModelManager.ts # sdk/runanywhere-web/packages/core/src/Infrastructure/OPFSStorage.ts # sdk/runanywhere-web/packages/llamacpp/src/Extensions/RunAnywhere+TextGeneration.ts * fix(web): address PR #394 feedback (OOM fallback, local file streaming) # Conflicts: # sdk/runanywhere-web/packages/core/src/Infrastructure/LocalFileStorage.ts # sdk/runanywhere-web/packages/core/src/Infrastructure/ModelDownloader.ts # sdk/runanywhere-web/packages/llamacpp/src/Extensions/RunAnywhere+TextGeneration.ts * fix(web): add index signatures * fix(web): address PR #394 bot review comments
1 parent 9397aa2 commit f5c49ff

8 files changed

Lines changed: 213 additions & 25 deletions

File tree

sdk/runanywhere-web/packages/core/src/Infrastructure/LocalFileStorage.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,23 @@ export class LocalFileStorage {
349349
}
350350
}
351351

352+
/**
353+
* Get the File object for a model without reading into memory.
354+
* Enables streaming / mounting for locally stored files.
355+
* @param key - Model identifier
356+
*/
357+
async loadModelFile(key: string): Promise<File | null> {
358+
if (!this.dirHandle || !this._isReady) return null;
359+
360+
try {
361+
const filename = this.sanitizeFilename(key);
362+
const fileHandle = await this.dirHandle.getFileHandle(filename);
363+
return await fileHandle.getFile();
364+
} catch {
365+
return null;
366+
}
367+
}
368+
352369
/**
353370
* Check if a model file exists in local storage.
354371
* @param key - Model identifier

sdk/runanywhere-web/packages/core/src/Infrastructure/ModelDownloader.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -555,9 +555,6 @@ export class ModelDownloader {
555555
return opfsStream;
556556
}
557557

558-
// Clean up corrupted 0-byte entries - we can't easily check length on the stream without consuming it,
559-
// so we skip the 0-byte check here for now and rely on loadFromOPFS to clean them up.
560-
561558
// Fall back to in-memory cache
562559
const cached = this.memoryCache.get(key);
563560
if (cached) {
@@ -574,6 +571,18 @@ export class ModelDownloader {
574571
return null;
575572
}
576573

574+
/** Load file object from storage (local FS or OPFS) without reading into memory. */
575+
async loadModelFile(key: string): Promise<File | null> {
576+
// Try local filesystem first
577+
if (this.localFileStorage?.isReady) {
578+
const file = await this.localFileStorage.loadModelFile(key);
579+
if (file) return file;
580+
}
581+
582+
// Try OPFS
583+
return this.storage.loadModelFile(key);
584+
}
585+
577586
/** Check existence in local storage, OPFS, or in-memory cache. */
578587
async existsInOPFS(key: string): Promise<boolean> {
579588
if (this.localFileStorage?.isReady) {

sdk/runanywhere-web/packages/core/src/Infrastructure/ModelLoaderTypes.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ export interface ModelLoadContext {
3434
* Primary model file data (read from storage).
3535
*
3636
* Note: This is optional. Backend loaders that support streaming
37-
* should prefer `dataStream` to avoid large memory allocations.
37+
* should prefer `dataStream` or `file` to avoid large memory allocations.
3838
*/
3939
data?: Uint8Array;
4040

@@ -46,6 +46,15 @@ export interface ModelLoadContext {
4646
*/
4747
dataStream?: ReadableStream<Uint8Array>;
4848

49+
/**
50+
* Primary model file object.
51+
*
52+
* When available, this allows backends to mount the file directly into
53+
* the WASM virtual filesystem (using WORKERFS) without loading it entirely
54+
* into JS memory, preventing OOM crashes with large models.
55+
*/
56+
file?: File;
57+
4958
/**
5059
* Download a file from a URL. Used for on-demand fetching of
5160
* companion files that aren't in storage yet.

sdk/runanywhere-web/packages/core/src/Infrastructure/ModelManager.ts

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,24 @@ class ModelManagerImpl {
326326
if (!data) throw new Error('Model not downloaded — please download the model first.');
327327
await this.loadVADModel(model, data);
328328
} else {
329-
const dataStream = await this.downloader.loadStreamFromOPFS(modelId);
330-
if (!dataStream) throw new Error('Model not downloaded — please download the model first.');
331-
await this.loadLLMModel(model, modelId, undefined, dataStream);
329+
// Try to get the File object directly (WORKERFS path) to avoid loading into memory
330+
const file = await this.downloader.loadModelFile(modelId);
331+
let dataStream: ReadableStream<Uint8Array> | undefined;
332+
let data: Uint8Array | undefined;
333+
334+
if (!file) {
335+
// Try streaming
336+
dataStream = await this.downloader.loadStreamFromOPFS(modelId) ?? undefined;
337+
338+
if (!dataStream) {
339+
// Fallback to legacy buffering
340+
data = await this.downloader.loadFromOPFS(modelId) ?? undefined;
341+
}
342+
}
343+
344+
if (!file && !dataStream && !data) throw new Error('Model not downloaded — please download the model first.');
345+
346+
await this.loadLLMModel(model, modelId, data, dataStream, file ?? undefined);
332347
}
333348

334349
this.loadedByCategory.set(category, modelId);
@@ -461,11 +476,12 @@ class ModelManagerImpl {
461476
/**
462477
* Build a ModelLoadContext for passing to backend loaders.
463478
*/
464-
private buildLoadContext(model: ManagedModel, data?: Uint8Array, dataStream?: ReadableStream<Uint8Array>): ModelLoadContext {
479+
private buildLoadContext(model: ManagedModel, data?: Uint8Array, dataStream?: ReadableStream<Uint8Array>, file?: File): ModelLoadContext {
465480
return {
466481
model,
467482
data,
468483
dataStream,
484+
file,
469485
downloadFile: (url: string) => this.downloader.downloadFile(url),
470486
loadFile: (fileKey: string) => this.downloader.loadFromOPFS(fileKey),
471487
storeFile: (fileKey: string, fileData: Uint8Array) => this.downloader.storeInOPFS(fileKey, fileData),
@@ -478,9 +494,9 @@ class ModelManagerImpl {
478494
* The loader (in @runanywhere/web-llamacpp) handles writing to its own
479495
* Emscripten FS and calling the C API.
480496
*/
481-
private async loadLLMModel(model: ManagedModel, _modelId: string, data?: Uint8Array, dataStream?: ReadableStream<Uint8Array>): Promise<void> {
497+
private async loadLLMModel(model: ManagedModel, _modelId: string, data?: Uint8Array, dataStream?: ReadableStream<Uint8Array>, file?: File): Promise<void> {
482498
if (!this.llmLoader) throw new Error('No LLM loader registered. Register the @runanywhere/web-llamacpp package.');
483-
const ctx = this.buildLoadContext(model, data, dataStream);
499+
const ctx = this.buildLoadContext(model, data, dataStream, file);
484500
await this.llmLoader.loadModelFromData(ctx);
485501
logger.info(`LLM model loaded: ${model.id}`);
486502
}
@@ -498,9 +514,18 @@ class ModelManagerImpl {
498514
if (!mmprojFile) {
499515
// No mmproj — load as text-only LLM
500516
logger.warning(`No mmproj found, loading as text-only LLM: ${modelId}`);
501-
const dataStream = await this.downloader.loadStreamFromOPFS(modelId);
502-
if (!dataStream) throw new Error('Model not downloaded.');
503-
await this.loadLLMModel(model, modelId, undefined, dataStream);
517+
518+
const file = await this.downloader.loadModelFile(modelId);
519+
let dataStream: ReadableStream<Uint8Array> | undefined;
520+
let data: Uint8Array | undefined;
521+
522+
if (!file) {
523+
dataStream = await this.downloader.loadStreamFromOPFS(modelId) ?? undefined;
524+
if (!dataStream) data = await this.downloader.loadFromOPFS(modelId) ?? undefined;
525+
}
526+
527+
if (!file && !dataStream && !data) throw new Error('Model not downloaded.');
528+
await this.loadLLMModel(model, modelId, data, dataStream, file ?? undefined);
504529
return;
505530
}
506531

sdk/runanywhere-web/packages/core/src/Infrastructure/OPFSStorage.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,25 @@ export class OPFSStorage {
207207
}
208208
}
209209

210+
/**
211+
* Load model file object from OPFS without reading contents into memory.
212+
*
213+
* @param key - Model identifier or nested path
214+
* @returns File object, or null if not found
215+
*/
216+
async loadModelFile(key: string): Promise<File | null> {
217+
if (!this.modelsDir) return null;
218+
219+
try {
220+
const dir = await this.resolveParentDir(key, /* create */ false);
221+
const filename = this.resolveFilename(key);
222+
const fileHandle = await dir.getFileHandle(filename);
223+
return await fileHandle.getFile();
224+
} catch {
225+
return null;
226+
}
227+
}
228+
210229
/**
211230
* Check if a model exists in OPFS.
212231
*

sdk/runanywhere-web/packages/core/src/types/models.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ export interface STTResult {
102102
confidence: number;
103103
duration: number;
104104
alternatives: STTAlternative[];
105+
[key: string]: unknown;
105106
}
106107

107108
export interface STTSegment {

sdk/runanywhere-web/packages/llamacpp/src/Extensions/RunAnywhere+TextGeneration.ts

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const logger = new SDKLogger('TextGeneration');
3232
class TextGenerationImpl {
3333
readonly extensionName = 'TextGeneration';
3434
private _llmComponentHandle = 0;
35+
private _mountedPath: string | null = null;
3536

3637
/** Ensure the SDK is initialized and return the bridge. */
3738
private requireBridge(): LlamaCppBridge {
@@ -75,15 +76,43 @@ class TextGenerationImpl {
7576
*/
7677
async loadModelFromData(ctx: ModelLoadContext): Promise<void> {
7778
const bridge = this.requireBridge();
78-
const modelPath = `/models/${ctx.model.id}.gguf`;
79-
if (ctx.dataStream) {
79+
let modelPath: string | null = null;
80+
let isMounted = false;
81+
82+
if (this._mountedPath) {
83+
try { bridge.unmount(this._mountedPath); } catch { /* ignore */ }
84+
this._mountedPath = null;
85+
}
86+
87+
if (ctx.file) {
88+
modelPath = bridge.mountFile(ctx.file);
89+
if (modelPath) {
90+
isMounted = true;
91+
this._mountedPath = modelPath;
92+
} else {
93+
logger.warning('Mounting failed (WORKERFS unavailable?), falling back to reading file as stream');
94+
modelPath = `/models/${ctx.model.id}.gguf`;
95+
await bridge.writeFileStream(modelPath, ctx.file.stream() as unknown as ReadableStream<Uint8Array>);
96+
}
97+
} else if (ctx.dataStream) {
98+
modelPath = `/models/${ctx.model.id}.gguf`;
8099
await bridge.writeFileStream(modelPath, ctx.dataStream);
81100
} else if (ctx.data) {
101+
modelPath = `/models/${ctx.model.id}.gguf`;
82102
bridge.writeFile(modelPath, ctx.data);
83103
} else {
84104
throw new Error('No data provided to loadModelFromData');
85105
}
86-
await this.loadModel(modelPath, ctx.model.id, ctx.model.name);
106+
107+
try {
108+
await this.loadModel(modelPath, ctx.model.id, ctx.model.name);
109+
} catch (err) {
110+
if (isMounted) {
111+
bridge.unmount(modelPath);
112+
this._mountedPath = null;
113+
}
114+
throw err;
115+
}
87116
}
88117

89118
/**
@@ -140,16 +169,24 @@ class TextGenerationImpl {
140169

141170
const bridge = this.requireBridge();
142171

143-
const result = await bridge.callFunction<number | Promise<number>>(
144-
'rac_llm_component_unload',
145-
'number',
146-
['number'],
147-
[this._llmComponentHandle],
148-
{ async: true },
149-
) as number;
150-
bridge.checkResult(result, 'rac_llm_component_unload');
172+
try {
173+
const result = await bridge.callFunction<number | Promise<number>>(
174+
'rac_llm_component_unload',
175+
'number',
176+
['number'],
177+
[this._llmComponentHandle],
178+
{ async: true },
179+
) as number;
180+
bridge.checkResult(result, 'rac_llm_component_unload');
151181

152-
logger.info('LLM model unloaded');
182+
logger.info('LLM model unloaded');
183+
} finally {
184+
// Clean up mounted file if applicable (always run cleanup even if unload fails)
185+
if (this._mountedPath) {
186+
bridge.unmount(this._mountedPath);
187+
this._mountedPath = null;
188+
}
189+
}
153190
}
154191

155192
/**

sdk/runanywhere-web/packages/llamacpp/src/Foundation/LlamaCppBridge.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,11 @@ export interface LlamaCppModule {
160160
FS_createPath?: (parent: string, path: string, canRead: boolean, canWrite: boolean) => void;
161161
FS_createDataFile?: (parent: string, name: string, data: Uint8Array, canRead: boolean, canWrite: boolean, canOwn: boolean) => void;
162162
FS_unlink?: (path: string) => void;
163+
FS_mkdir?: (path: string) => void;
164+
FS_rmdir?: (path: string) => void;
165+
FS_mount?: (type: any, opts: any, mountpoint: string) => void;
166+
FS_unmount?: (mountpoint: string) => void;
167+
WORKERFS?: any;
163168

164169
// Generic index access for dynamic function lookups
165170
[key: string]: unknown;
@@ -171,6 +176,7 @@ export interface LlamaCppModule {
171176

172177
export class LlamaCppBridge {
173178
private static _instance: LlamaCppBridge | null = null;
179+
private static _nextMountId = 0;
174180
private _module: LlamaCppModule | null = null;
175181
private _loaded = false;
176182
private _loading: Promise<void> | null = null;
@@ -438,6 +444,71 @@ export class LlamaCppBridge {
438444
try { this.module.FS_unlink?.(path); } catch { /* doesn't exist */ }
439445
}
440446

447+
/**
448+
* Mount a File object into the WASM filesystem (if WORKERFS is available).
449+
* Returns the path to the mounted file, or null if mounting failed/unsupported.
450+
*
451+
* @param file - The browser File object
452+
* @returns The absolute path to the file in WASM FS (e.g. /mnt-123/model.gguf) or null
453+
*/
454+
mountFile(file: File): string | null {
455+
const m = this.module;
456+
if (!m.FS_mount || !m.WORKERFS) return null;
457+
458+
let createdMountDir = false;
459+
let mountDir = '';
460+
461+
try {
462+
// Create a unique mount point directory
463+
const mountId = LlamaCppBridge._nextMountId++;
464+
mountDir = `/mnt-${mountId}`;
465+
466+
if (m.FS_mkdir) {
467+
m.FS_mkdir(mountDir);
468+
createdMountDir = true;
469+
}
470+
471+
// Mount the file. WORKERFS expects { files: [File, ...] } or { files: [{name, data: File}] }
472+
// We assume the standard Emscripten WORKERFS behavior where `files` array mounts them by name.
473+
m.FS_mount(m.WORKERFS, { files: [file] }, mountDir);
474+
475+
logger.debug(`Mounted ${file.name} to ${mountDir}`);
476+
return `${mountDir}/${file.name}`;
477+
} catch (err) {
478+
if (createdMountDir && m.FS_rmdir) {
479+
try { m.FS_rmdir(mountDir); } catch { logger.warning(`Failed to clean up mount dir ${mountDir}`); }
480+
}
481+
const msg = err instanceof Error ? err.message : String(err);
482+
logger.warning(`Failed to mount file (WORKERFS): ${msg}`);
483+
return null;
484+
}
485+
}
486+
487+
/**
488+
* Unmount a directory (and remove it).
489+
* @param mountDir - The directory path (e.g. /mnt-123)
490+
*/
491+
unmount(mountPath: string): void {
492+
if (!mountPath.startsWith('/mnt-')) return; // Safety check
493+
494+
// Strip filename if present
495+
const parts = mountPath.split('/');
496+
// formatted like ["", "mnt-123", "filename"]
497+
let dir = mountPath;
498+
if (parts.length >= 3) {
499+
dir = `/${parts[1]}`;
500+
}
501+
502+
try {
503+
const m = this.module;
504+
if (m.FS_unmount) m.FS_unmount(dir);
505+
if (m.FS_rmdir) m.FS_rmdir(dir);
506+
logger.debug(`Unmounted ${dir}`);
507+
} catch {
508+
/* ignore cleanup errors */
509+
}
510+
}
511+
441512
// -----------------------------------------------------------------------
442513
// WebGPU Detection
443514
// -----------------------------------------------------------------------

0 commit comments

Comments
 (0)