Skip to content

Commit 4a2ea17

Browse files
Refactor audio and video infrastructure in the RunAnywhere Web SDK
- Updated imports in `speak.ts`, `transcribe.ts`, `vision.ts`, and `voice.ts` to use the new core package for audio and video functionalities. - Introduced new `AudioCapture`, `AudioPlayback`, and `VideoCapture` classes to handle audio and video processing more efficiently. - Added backend-agnostic types for STT, TTS, LLM, and VLM in the core types module. - Implemented streaming capabilities for model downloads in `ModelDownloader`. - Enhanced the overall structure for better modularity and maintainability.
1 parent 61f6f1a commit 4a2ea17

39 files changed

Lines changed: 521 additions & 236 deletions

examples/web/RunAnywhereAI/src/views/speak.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ const SURPRISE_TEXTS = [
2121
];
2222

2323
let ttsIsSpeaking = false;
24-
let ttsPlayback: InstanceType<
25-
typeof import('../../../../../sdk/runanywhere-web/packages/onnx/src/Infrastructure/AudioPlayback').AudioPlayback
26-
> | null = null;
24+
let ttsPlayback: import('../../../../../sdk/runanywhere-web/packages/core/src/Infrastructure/AudioPlayback').AudioPlayback | null = null;
2725

2826
// ---------------------------------------------------------------------------
2927
// Init
@@ -139,7 +137,10 @@ async function handleSpeak(): Promise<void> {
139137
statusEl.textContent = 'Synthesizing speech...';
140138
const speed = parseFloat(speedSlider.value);
141139

142-
const { TTS, AudioPlayback } = await import(
140+
const { AudioPlayback } = await import(
141+
'../../../../../sdk/runanywhere-web/packages/core/src/index'
142+
);
143+
const { TTS } = await import(
143144
'../../../../../sdk/runanywhere-web/packages/onnx/src/index'
144145
);
145146

examples/web/RunAnywhereAI/src/views/transcribe.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
*/
55

66
import type { TabLifecycle } from '../app';
7-
import { AudioCapture, VAD, SpeechActivity } from '../../../../../sdk/runanywhere-web/packages/onnx/src/index';
7+
import { AudioCapture, SpeechActivity } from '../../../../../sdk/runanywhere-web/packages/core/src/index';
8+
import { VAD } from '../../../../../sdk/runanywhere-web/packages/onnx/src/index';
89
import { ModelManager, ModelCategory, ensureVADLoaded, type ModelInfo } from '../services/model-manager';
910
import { showModelSelectionSheet } from '../components/model-selection';
1011

examples/web/RunAnywhereAI/src/views/vision.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
import type { TabLifecycle } from '../app';
1313
import { ModelManager, ModelCategory, type ModelInfo } from '../services/model-manager';
1414
import { showModelSelectionSheet } from '../components/model-selection';
15-
import { VLMWorkerBridge, VideoCapture, type CapturedFrame } from '../../../../../sdk/runanywhere-web/packages/llamacpp/src/index';
15+
import { VideoCapture, type CapturedFrame } from '../../../../../sdk/runanywhere-web/packages/core/src/index';
16+
import { VLMWorkerBridge } from '../../../../../sdk/runanywhere-web/packages/llamacpp/src/index';
1617

1718
// ---------------------------------------------------------------------------
1819
// Constants (matching iOS VLMViewModel defaults)

examples/web/RunAnywhereAI/src/views/voice.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import type { TabLifecycle } from '../app';
99
import { showModelSelectionSheet } from '../components/model-selection';
1010
import { ModelManager, ModelCategory, ensureVADLoaded } from '../services/model-manager';
11-
import { VoicePipeline, PipelineState } from '../../../../../sdk/runanywhere-web/packages/core/src/index';
12-
import {
13-
AudioCapture, AudioPlayback, VAD, SpeechActivity,
14-
} from '../../../../../sdk/runanywhere-web/packages/onnx/src/index';
11+
import { VoicePipeline, PipelineState, AudioCapture, AudioPlayback, SpeechActivity } from '../../../../../sdk/runanywhere-web/packages/core/src/index';
12+
import { VAD } from '../../../../../sdk/runanywhere-web/packages/onnx/src/index';
1513

1614
/** Shared AudioCapture instance for this view (replaces app-level MicCapture singleton). */
1715
const micCapture = new AudioCapture();

sdk/runanywhere-web/packages/onnx/src/Infrastructure/AudioCapture.ts renamed to sdk/runanywhere-web/packages/core/src/Infrastructure/AudioCapture.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* - Audio level monitoring via AnalyserNode (currentLevel getter)
1313
*/
1414

15-
import { SDKLogger } from '@runanywhere/web';
15+
import { SDKLogger } from '../Foundation/SDKLogger';
1616

1717
const logger = new SDKLogger('AudioCapture');
1818

sdk/runanywhere-web/packages/onnx/src/Infrastructure/AudioFileLoader.ts renamed to sdk/runanywhere-web/packages/core/src/Infrastructure/AudioFileLoader.ts

File renamed without changes.

sdk/runanywhere-web/packages/onnx/src/Infrastructure/AudioPlayback.ts renamed to sdk/runanywhere-web/packages/core/src/Infrastructure/AudioPlayback.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
* - Completion callbacks
1313
*/
1414

15-
import { SDKLogger, EventBus, SDKEventType } from '@runanywhere/web';
15+
import { SDKLogger } from '../Foundation/SDKLogger';
16+
import { EventBus } from '../Foundation/EventBus';
17+
import { SDKEventType } from '../types/enums';
1618

1719
const logger = new SDKLogger('AudioPlayback');
1820

sdk/runanywhere-web/packages/core/src/Infrastructure/ModelDownloader.ts

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,11 @@ export class ModelDownloader {
224224
let totalBytesDownloaded = 0;
225225
let totalBytesExpected = 0;
226226

227-
// Download the primary file
228-
const primaryData = await this.downloadFile(model.url, (progress, bytesDown, bytesTotal) => {
227+
// Try streaming the primary file directly to storage (keeps memory constant).
228+
// Falls back to buffered download + store if streaming is not possible.
229+
const primaryProgressCb = (progress: number, bytesDown: number, bytesTotal: number) => {
229230
totalBytesDownloaded = bytesDown;
230-
totalBytesExpected = bytesTotal * totalFiles; // rough estimate
231+
totalBytesExpected = bytesTotal * totalFiles;
231232
const overallProgress = progress / totalFiles;
232233
this.registry.updateModel(modelId, { downloadProgress: overallProgress });
233234
this.emitDownloadProgress({
@@ -240,16 +241,21 @@ export class ModelDownloader {
240241
filesCompleted: 0,
241242
filesTotal: totalFiles,
242243
});
243-
});
244+
};
244245

245-
await this.storeInOPFS(modelId, primaryData);
246+
let primarySize = await this.downloadAndStoreStreaming(model.url, modelId, primaryProgressCb);
247+
if (primarySize === null) {
248+
const primaryData = await this.downloadFile(model.url, primaryProgressCb);
249+
await this.storeInOPFS(modelId, primaryData);
250+
primarySize = primaryData.length;
251+
}
246252

247253
// Download additional files (e.g., mmproj for VLM)
248254
if (model.additionalFiles && model.additionalFiles.length > 0) {
249255
for (let i = 0; i < model.additionalFiles.length; i++) {
250256
const file = model.additionalFiles[i];
251257
const fileKey = this.additionalFileKey(modelId, file.filename);
252-
const fileData = await this.downloadFile(file.url, (progress, bytesDown, bytesTotal) => {
258+
const fileProgressCb = (progress: number, bytesDown: number, bytesTotal: number) => {
253259
const baseProgress = (1 + i) / totalFiles;
254260
const fileProgress = progress / totalFiles;
255261
const overallProgress = baseProgress + fileProgress;
@@ -264,8 +270,13 @@ export class ModelDownloader {
264270
filesCompleted: 1 + i,
265271
filesTotal: totalFiles,
266272
});
267-
});
268-
await this.storeInOPFS(fileKey, fileData);
273+
};
274+
275+
const streamedSize = await this.downloadAndStoreStreaming(file.url, fileKey, fileProgressCb);
276+
if (streamedSize === null) {
277+
const fileData = await this.downloadFile(file.url, fileProgressCb);
278+
await this.storeInOPFS(fileKey, fileData);
279+
}
269280
}
270281
}
271282

@@ -280,7 +291,7 @@ export class ModelDownloader {
280291
filesTotal: totalFiles,
281292
});
282293

283-
let totalSize = primaryData.length;
294+
let totalSize = primarySize;
284295
if (model.additionalFiles) {
285296
for (const file of model.additionalFiles) {
286297
const fileKey = this.additionalFileKey(modelId, file.filename);
@@ -359,6 +370,56 @@ export class ModelDownloader {
359370
return data;
360371
}
361372

373+
/**
374+
* Download a file and stream it directly to persistent storage (OPFS or local FS)
375+
* without buffering the entire payload in memory.
376+
*
377+
* Returns the total bytes downloaded. Falls back to buffered download + store
378+
* if streaming write is not supported or fails.
379+
*
380+
* @returns Total bytes written, or null if streaming was not possible.
381+
*/
382+
async downloadAndStoreStreaming(
383+
url: string,
384+
storageKey: string,
385+
onProgress?: (progress: number, bytesDownloaded: number, totalBytes: number) => void,
386+
): Promise<number | null> {
387+
validateModelUrl(url);
388+
const response = await fetch(url);
389+
if (!response.ok) throw new Error(`HTTP ${response.status} for ${url}`);
390+
if (!response.body) return null;
391+
392+
const total = Number(response.headers.get('content-length') || 0);
393+
let received = 0;
394+
395+
// Build a progress-tracking pass-through stream
396+
const progressTransform = new TransformStream<Uint8Array, Uint8Array>({
397+
transform: (chunk, controller) => {
398+
received += chunk.length;
399+
onProgress?.(total > 0 ? received / total : 0, received, total);
400+
controller.enqueue(chunk);
401+
},
402+
});
403+
404+
const storageStream = response.body.pipeThrough(progressTransform);
405+
406+
try {
407+
if (this.localFileStorage?.isReady) {
408+
await this.localFileStorage.saveModelFromStream(storageKey, storageStream);
409+
logger.info(`Streamed ${storageKey} to local storage (${(received / 1024 / 1024).toFixed(1)} MB)`);
410+
return received;
411+
}
412+
413+
await this.storage.saveModelFromStream(storageKey, storageStream);
414+
logger.info(`Streamed ${storageKey} to OPFS (${(received / 1024 / 1024).toFixed(1)} MB)`);
415+
return received;
416+
} catch (err) {
417+
const msg = err instanceof Error ? err.message : String(err);
418+
logger.warning(`Streaming store failed for "${storageKey}": ${msg}, will fall back to buffered download`);
419+
return null;
420+
}
421+
}
422+
362423
/** Store data, preferring local filesystem when available, then OPFS, then memory cache. */
363424
async storeInOPFS(key: string, data: Uint8Array): Promise<void> {
364425
const sizeMB = (data.length / 1024 / 1024).toFixed(1);

sdk/runanywhere-web/packages/core/src/Infrastructure/ProviderTypes.ts

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
* Core code (e.g. VoicePipeline) retrieves them at runtime via
77
* `ExtensionPoint.getProvider()` with full compile-time type safety.
88
*
9-
* Replaces the previous implicit `globalThis.__runanywhere_*` contract.
10-
* See: https://github.com/RunanywhereAI/runanywhere-sdks/issues/371
9+
* All referenced types (LLMGenerationResult, STTTranscriptionResult, etc.)
10+
* are defined in core so providers return properly typed results.
1111
*/
1212

13+
import type { LLMGenerationResult } from '../types/LLMTypes';
14+
import type { STTTranscriptionResult, STTTranscribeOptions } from '../types/STTTypes';
15+
import type { TTSSynthesisResult, TTSSynthesizeOptions } from '../types/TTSTypes';
16+
1317
// ---------------------------------------------------------------------------
1418
// Provider Capability Keys
1519
// ---------------------------------------------------------------------------
@@ -26,10 +30,6 @@ export type ProviderCapability = 'llm' | 'stt' | 'tts';
2630

2731
/**
2832
* LLM (text generation) provider — implemented by @runanywhere/web-llamacpp.
29-
*
30-
* Only the subset of the TextGeneration API that cross-package consumers
31-
* (e.g. VoicePipeline) depend on. Backend packages may expose additional
32-
* methods beyond this interface.
3333
*/
3434
export interface LLMProvider {
3535
generateStream(
@@ -41,47 +41,29 @@ export interface LLMProvider {
4141
},
4242
): Promise<{
4343
stream: AsyncIterable<string>;
44-
result: Promise<{
45-
text: string;
46-
tokensUsed: number;
47-
tokensPerSecond: number;
48-
[key: string]: unknown;
49-
}>;
44+
result: Promise<LLMGenerationResult>;
5045
cancel: () => void;
5146
}>;
5247
}
5348

5449
/**
5550
* STT (speech-to-text) provider — implemented by @runanywhere/web-onnx.
56-
*
57-
* Only the subset of the STT API that cross-package consumers depend on.
5851
*/
5952
export interface STTProvider {
6053
transcribe(
6154
audio: Float32Array,
62-
options?: { sampleRate?: number },
63-
): Promise<{
64-
text: string;
65-
[key: string]: unknown;
66-
}>;
55+
options?: STTTranscribeOptions,
56+
): Promise<STTTranscriptionResult>;
6757
}
6858

6959
/**
7060
* TTS (text-to-speech) provider — implemented by @runanywhere/web-onnx.
71-
*
72-
* Only the subset of the TTS API that cross-package consumers depend on.
7361
*/
7462
export interface TTSProvider {
7563
synthesize(
7664
text: string,
77-
options?: { speed?: number },
78-
): Promise<{
79-
audioData: Float32Array;
80-
sampleRate: number;
81-
durationMs: number;
82-
processingTimeMs: number;
83-
[key: string]: unknown;
84-
}>;
65+
options?: TTSSynthesizeOptions,
66+
): Promise<TTSSynthesisResult>;
8567
}
8668

8769
// ---------------------------------------------------------------------------

sdk/runanywhere-web/packages/llamacpp/src/Infrastructure/VideoCapture.ts renamed to sdk/runanywhere-web/packages/core/src/Infrastructure/VideoCapture.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
* ```
2727
*/
2828

29-
import { SDKLogger } from '@runanywhere/web';
29+
import { SDKLogger } from '../Foundation/SDKLogger';
3030

3131
const logger = new SDKLogger('VideoCapture');
3232

0 commit comments

Comments
 (0)