microsoft
diff --git a/‎samples/js/live-audio-transcription-example/README.md‎
Lines changed: 58 additions & 0 deletions b/‎samples/js/live-audio-transcription-example/README.md‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎samples/js/live-audio-transcription-example/app.js‎
Lines changed: 157 additions & 0 deletions b/‎samples/js/live-audio-transcription-example/app.js‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎sdk/js/src/detail/coreInterop.ts‎
Lines changed: 59 additions & 0 deletions b/‎sdk/js/src/detail/coreInterop.ts‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎sdk/js/src/imodel.ts‎
Lines changed: 8 additions & 0 deletions b/‎sdk/js/src/imodel.ts‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎sdk/js/src/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎sdk/js/src/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/js/src/model.ts‎
Lines changed: 9 additions & 0 deletions b/‎sdk/js/src/model.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sdk/js/src/modelVariant.ts‎
Lines changed: 9 additions & 0 deletions b/‎sdk/js/src/modelVariant.ts‎
Lines changed: 9 additions & 0 deletions
@@ -0,0 +1,58 @@
+# Live Audio Transcription Example
+
+Real-time microphone-to-text transcription using the Foundry Local JS SDK with Nemotron ASR.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- Node.js 18+
+- A microphone (optional — falls back to synthetic audio)
+
+## Setup
+
+```bash
+npm install foundry-local-sdk naudiodon2
+```
+
+> **Note:** `naudiodon2` is optional — provides cross-platform microphone capture. Without it, the example falls back to synthetic audio for testing.
+
+## Run
+
+```bash
+node app.js
+```
+
+Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C` to stop.
+
+## How it works
+
+1. Initializes the Foundry Local SDK and loads the Nemotron ASR model
+2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
+3. Captures microphone audio via `naudiodon2` (or generates synthetic audio as fallback)
+4. Pushes PCM chunks to the SDK via `session.append()`
+5. Reads transcription results via `for await (const result of session.getTranscriptionStream())`
+6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)
+
+## API
+
+```javascript
+const audioClient = model.createAudioClient();
+const session = audioClient.createLiveTranscriptionSession();
+session.settings.sampleRate = 16000;
+session.settings.channels = 1;
+session.settings.language = 'en';
+
+await session.start();
+
+// Push audio
+await session.append(pcmBytes);
+
+// Read results
+for await (const result of session.getTranscriptionStream()) {
+    console.log(result.content[0].text);       // transcribed text
+    console.log(result.content[0].transcript); // alias (OpenAI compat)
+    console.log(result.is_final);              // true for final results
+}
+
+await session.stop();
+```
@@ -0,0 +1,157 @@
+// Live Audio Transcription Example — Foundry Local JS SDK
+//
+// Demonstrates real-time microphone-to-text using the JS SDK.
+// Requires: npm install foundry-local-sdk naudiodon2
+//
+// Usage: node app.js
+
+import { FoundryLocalManager } from 'foundry-local-sdk';
+
+console.log('╔══════════════════════════════════════════════════════════╗');
+console.log('║   Foundry Local — Live Audio Transcription (JS SDK)     ║');
+console.log('╚══════════════════════════════════════════════════════════╝');
+console.log();
+
+// Initialize the Foundry Local SDK
+console.log('Initializing Foundry Local SDK...');
+const manager = FoundryLocalManager.create({
+    appName: 'foundry_local_live_audio',
+    logLevel: 'info'
+});
+console.log('✓ SDK initialized');
+
+// Get and load the nemotron model
+const modelAlias = 'nemotron';
+let model = await manager.catalog.getModel(modelAlias);
+if (!model) {
+    console.error(`ERROR: Model "${modelAlias}" not found in catalog.`);
+    process.exit(1);
+}
+
+console.log(`Found model: ${model.id}`);
+console.log('Downloading model (if needed)...');
+await model.download((progress) => {
+    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
+});
+console.log('\n✓ Model downloaded');
+
+console.log('Loading model...');
+await model.load();
+console.log('✓ Model loaded');
+
+// Create live transcription session
+const audioClient = model.createAudioClient();
+const session = audioClient.createLiveTranscriptionSession();
+session.settings.sampleRate = 16000;  // Default is 16000; shown here for clarity
+session.settings.channels = 1;
+session.settings.bitsPerSample = 16;
+session.settings.language = 'en';
+
+console.log('Starting streaming session...');
+await session.start();
+console.log('✓ Session started');
+
+// Read transcription results in background
+const readPromise = (async () => {
+    try {
+        for await (const result of session.getTranscriptionStream()) {
+            const text = result.content?.[0]?.text;
+            if (result.is_final) {
+                console.log();
+                console.log(`  [FINAL] ${text}`);
+            } else if (text) {
+                process.stdout.write(text);
+            }
+        }
+    } catch (err) {
+        if (err.name !== 'AbortError') {
+            console.error('Stream error:', err.message);
+        }
+    }
+})();
+
+// --- Microphone capture ---
+// This example uses naudiodon2 for cross-platform audio capture.
+// Install with: npm install naudiodon2
+//
+// If you prefer a different audio library, just push PCM bytes
+// (16-bit signed LE, mono, 16kHz) via session.append().
+
+let audioInput;
+try {
+    const { default: portAudio } = await import('naudiodon2');
+
+    audioInput = portAudio.AudioIO({
+        inOptions: {
+            channelCount: session.settings.channels,
+            sampleFormat: session.settings.bitsPerSample === 16
+                ? portAudio.SampleFormat16Bit
+                : portAudio.SampleFormat32Bit,
+            sampleRate: session.settings.sampleRate,
+            framesPerBuffer: 1600,  // 100ms chunks
+            maxQueue: 15            // buffer during event-loop blocks from sync FFI calls
+        }
+    });
+
+    let appendPending = false;
+    audioInput.on('data', (buffer) => {
+        if (appendPending) return; // drop frame while backpressured
+        const pcm = new Uint8Array(buffer);
+        appendPending = true;
+        session.append(pcm).then(() => {
+            appendPending = false;
+        }).catch((err) => {
+            appendPending = false;
+            console.error('append error:', err.message);
+        });
+    });
+
+    console.log();
+    console.log('════════════════════════════════════════════════════════════');
+    console.log('  LIVE TRANSCRIPTION ACTIVE');
+    console.log('  Speak into your microphone.');
+    console.log('  Press Ctrl+C to stop.');
+    console.log('════════════════════════════════════════════════════════════');
+    console.log();
+
+    audioInput.start();
+} catch (err) {
+    console.warn('⚠ Could not initialize microphone (naudiodon2 may not be installed).');
+    console.warn('  Install with: npm install naudiodon2');
+    console.warn('  Falling back to synthetic audio test...');
+    console.warn();
+
+    // Fallback: push 2 seconds of synthetic PCM (440Hz sine wave)
+    const sampleRate = session.settings.sampleRate;
+    const duration = 2;
+    const totalSamples = sampleRate * duration;
+    const pcmBytes = new Uint8Array(totalSamples * 2);
+    for (let i = 0; i < totalSamples; i++) {
+        const t = i / sampleRate;
+        const sample = Math.round(32767 * 0.5 * Math.sin(2 * Math.PI * 440 * t));
+        pcmBytes[i * 2] = sample & 0xFF;
+        pcmBytes[i * 2 + 1] = (sample >> 8) & 0xFF;
+    }
+
+    // Push in 100ms chunks
+    const chunkSize = (sampleRate / 10) * 2;
+    for (let offset = 0; offset < pcmBytes.length; offset += chunkSize) {
+        const len = Math.min(chunkSize, pcmBytes.length - offset);
+        await session.append(pcmBytes.slice(offset, offset + len));
+    }
+
+    console.log('✓ Synthetic audio pushed');
+}
+
+// Handle graceful shutdown
+process.on('SIGINT', async () => {
+    console.log('\n\nStopping...');
+    if (audioInput) {
+        audioInput.quit();
+    }
+    await session.stop();
+    await readPromise;
+    await model.unload();
+    console.log('✓ Done');
+    process.exit(0);
+});
@@ -19,6 +19,16 @@ koffi.struct('ResponseBuffer', {
     ErrorLength: 'int32_t',
 });
 
+// Extended request struct for binary data (audio streaming)
+koffi.struct('StreamingRequestBuffer', {
+    Command: 'char*',
+    CommandLength: 'int32_t',
+    Data: 'char*',              // JSON params
+    DataLength: 'int32_t',
+    BinaryData: 'void*',        // raw PCM audio bytes
+    BinaryDataLength: 'int32_t',
+});
+
 const CallbackType = koffi.proto('void CallbackType(void *data, int32_t length, void *userData)');
 
 const __filename = fileURLToPath(import.meta.url);
@@ -28,6 +38,7 @@ export class CoreInterop {
     private lib: any;
     private execute_command: any;
     private execute_command_with_callback: any;
+    private execute_command_with_binary: any = null;
 
     private static _getLibraryExtension(): string {
         const platform = process.platform;
@@ -93,6 +104,7 @@ export class CoreInterop {
 
         this.execute_command = this.lib.func('void execute_command(RequestBuffer *request, _Inout_ ResponseBuffer *response)');
         this.execute_command_with_callback = this.lib.func('void execute_command_with_callback(RequestBuffer *request, _Inout_ ResponseBuffer *response, CallbackType *callback, void *userData)');
+        this.execute_command_with_binary = this.lib.func('void execute_command_with_binary(StreamingRequestBuffer *request, _Inout_ ResponseBuffer *response)');
     }
 
     public executeCommand(command: string, params?: any): string {
@@ -129,6 +141,53 @@ export class CoreInterop {
         }
     }
 
+    /**
+     * Execute a native command with binary data (e.g., audio PCM bytes).
+     * Uses the execute_command_with_binary native entry point which accepts
+     * both JSON params and raw binary data via StreamingRequestBuffer.
+     */
+    public executeCommandWithBinary(command: string, params: any, binaryData: Uint8Array): string {
+        const cmdBuf = koffi.alloc('char', command.length + 1);
+        koffi.encode(cmdBuf, 'char', command, command.length + 1);
+
+        const dataStr = params ? JSON.stringify(params) : '';
+        const dataBytes = this._toBytes(dataStr);
+        const dataBuf = koffi.alloc('char', dataBytes.length + 1);
+        koffi.encode(dataBuf, 'char', dataStr, dataBytes.length + 1);
+
+        // For binary data, use a Node.js Buffer which allocates stable external memory
+        // that won't be moved by V8's garbage collector during the FFI call.
+        const binLength = binaryData.length;
+        const binBuf = Buffer.from(binaryData);
+        
+        // Use koffi.as to pass Buffer directly as a typed pointer
+        const binTypedPtr = koffi.as(binBuf, 'void *');
+
+        const req = {
+            Command: koffi.address(cmdBuf),
+            CommandLength: command.length,
+            Data: koffi.address(dataBuf),
+            DataLength: dataBytes.length,
+            BinaryData: binTypedPtr,
+            BinaryDataLength: binLength
+        };
+        const res = { Data: 0, DataLength: 0, Error: 0, ErrorLength: 0 };
+
+        this.execute_command_with_binary(req, res);
+
+        try {
+            if (res.Error) {
+                const errorMsg = koffi.decode(res.Error, 'char', res.ErrorLength);
+                throw new Error(`Command '${command}' failed: ${errorMsg}`);
+            }
+
+            return res.Data ? koffi.decode(res.Data, 'char', res.DataLength) : "";
+        } finally {
+            if (res.Data) koffi.free(res.Data);
+            if (res.Error) koffi.free(res.Error);
+        }
+    }
+
     public executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise<void> {
         const cmdBuf = koffi.alloc('char', command.length + 1);
         koffi.encode(cmdBuf, 'char', command, command.length + 1);
 
@@ -1,5 +1,6 @@
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionSession } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 
 export interface IModel {
@@ -22,6 +23,13 @@ export interface IModel {
 
     createChatClient(): ChatClient;
     createAudioClient(): AudioClient;
+
+    /**
+     * Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
+     * The model must be loaded before calling this method.
+     * @returns A LiveAudioTranscriptionSession instance.
+     */
+    createLiveTranscriptionSession(): LiveAudioTranscriptionSession;
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * Unlike createChatClient/createAudioClient (which use FFI), the Responses API
 
@@ -6,6 +6,8 @@ export { ModelVariant } from './modelVariant.js';
 export type { IModel } from './imodel.js';
 export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
 export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
+export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
+export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
 export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
 export { ModelLoadManager } from './detail/modelLoadManager.js';
 /** @internal */
 
@@ -1,6 +1,7 @@
 import { ModelVariant } from './modelVariant.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionSession } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 import { IModel } from './imodel.js';
 
@@ -179,6 +180,14 @@ export class Model implements IModel {
         return this.selectedVariant.createAudioClient();
     }
 
+    /**
+     * Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
+     * @returns A LiveAudioTranscriptionSession instance.
+     */
+    public createLiveTranscriptionSession(): LiveAudioTranscriptionSession {
+        return this.selectedVariant.createLiveTranscriptionSession();
+    }
+
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * @param baseUrl - The base URL of the Foundry Local web service.
 
@@ -3,6 +3,7 @@ import { ModelLoadManager } from './detail/modelLoadManager.js';
 import { ModelInfo } from './types.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionSession } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 import { IModel } from './imodel.js';
 
@@ -149,6 +150,14 @@ export class ModelVariant implements IModel {
         return new AudioClient(this._modelInfo.id, this.coreInterop);
     }
 
+    /**
+     * Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
+     * @returns A LiveAudioTranscriptionSession instance.
+     */
+    public createLiveTranscriptionSession(): LiveAudioTranscriptionSession {
+        return new LiveAudioTranscriptionSession(this._modelInfo.id, this.coreInterop);
+    }
+
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * @param baseUrl - The base URL of the Foundry Local web service.