@@ -77,6 +77,14 @@ export function initTranscribeTab(el: HTMLElement): TabLifecycle {
7777 </div>
7878 <h3 class="font-semibold">Ready to transcribe</h3>
7979 <p id="stt-mode-desc" class="helper-text text-center">Record first, then transcribe</p>
80+
81+ <!-- File drop zone — visible in batch mode only -->
82+ <div id="stt-drop-zone" class="stt-drop-zone">
83+ <div class="stt-drop-zone-icon">📂</div>
84+ <div class="stt-drop-zone-label">Drop audio file or click to browse</div>
85+ <div class="stt-drop-zone-hint">wav · mp3 · m4a · ogg · flac</div>
86+ </div>
87+ <input type="file" id="stt-file-input" accept="audio/*" style="display:none">
8088 </div>
8189
8290 <!-- Transcription result area -->
@@ -119,6 +127,9 @@ export function initTranscribeTab(el: HTMLElement): TabLifecycle {
119127 showModelSelectionSheet ( ModelCategory . SpeechRecognition ) ,
120128 ) ;
121129
130+ // File drop zone
131+ wireDropZone ( ) ;
132+
122133 // Subscribe to model changes so the pill label stays current
123134 ModelManager . onChange ( onSTTModelsChanged ) ;
124135 onSTTModelsChanged ( ModelManager . getModels ( ) ) ;
@@ -343,6 +354,67 @@ function stopLiveVAD(): void {
343354 if ( unsubscribeVAD ) { unsubscribeVAD ( ) ; unsubscribeVAD = null ; }
344355}
345356
357+ // ---------------------------------------------------------------------------
358+ // File Drop Zone (delegates all conversion + transcription to SDK)
359+ // ---------------------------------------------------------------------------
360+
361+ function wireDropZone ( ) : void {
362+ const dropZone = container . querySelector ( '#stt-drop-zone' ) as HTMLElement ;
363+ const fileInput = container . querySelector ( '#stt-file-input' ) as HTMLInputElement ;
364+
365+ // Click → open file picker
366+ dropZone . addEventListener ( 'click' , ( ) => fileInput . click ( ) ) ;
367+
368+ // File picker selection
369+ fileInput . addEventListener ( 'change' , ( ) => {
370+ const file = fileInput . files ?. [ 0 ] ;
371+ if ( file ) {
372+ fileInput . value = '' ;
373+ void transcribeFromFile ( file ) ;
374+ }
375+ } ) ;
376+
377+ // Drag events
378+ dropZone . addEventListener ( 'dragover' , ( e ) => {
379+ e . preventDefault ( ) ;
380+ dropZone . classList . add ( 'drag-over' ) ;
381+ } ) ;
382+ dropZone . addEventListener ( 'dragleave' , ( ) => dropZone . classList . remove ( 'drag-over' ) ) ;
383+ dropZone . addEventListener ( 'drop' , ( e ) => {
384+ e . preventDefault ( ) ;
385+ dropZone . classList . remove ( 'drag-over' ) ;
386+ const file = e . dataTransfer ?. files [ 0 ] ;
387+ if ( file ) void transcribeFromFile ( file ) ;
388+ } ) ;
389+ }
390+
391+ async function transcribeFromFile ( file : File ) : Promise < void > {
392+ if ( sttState !== 'idle' ) return ;
393+
394+ sttError = '' ;
395+ sttTranscription = '' ;
396+ sttState = 'transcribing' ;
397+ renderSTTUI ( ) ;
398+
399+ try {
400+ const model = await ModelManager . ensureLoaded ( ModelCategory . SpeechRecognition ) ;
401+ if ( ! model ) throw new Error ( 'No STT model loaded. Tap the model button to download one.' ) ;
402+
403+ const { STT } = await import ( '../../../../../sdk/runanywhere-web/packages/onnx/src/index' ) ;
404+ if ( ! STT . isModelLoaded ) throw new Error ( 'STT model not loaded. Select a model first.' ) ;
405+
406+ // SDK handles all decoding, resampling, and transcription
407+ const result = await STT . transcribeFile ( file ) ;
408+ sttTranscription = result . text . trim ( ) || '' ;
409+ if ( ! sttTranscription ) sttError = 'No speech detected in the audio file.' ;
410+ } catch ( err ) {
411+ sttError = err instanceof Error ? err . message : String ( err ) ;
412+ }
413+
414+ sttState = 'idle' ;
415+ renderSTTUI ( ) ;
416+ }
417+
346418// ---------------------------------------------------------------------------
347419// UI Rendering
348420// ---------------------------------------------------------------------------
@@ -382,6 +454,12 @@ function renderSTTUI(): void {
382454 resultArea . style . display = hasResult ? 'flex' : 'none' ;
383455 if ( hasResult ) resultText . textContent = sttTranscription || 'Transcribing...' ;
384456
457+ // Drop zone: visible only in batch idle state
458+ const dropZone = container . querySelector ( '#stt-drop-zone' ) as HTMLElement | null ;
459+ if ( dropZone ) {
460+ dropZone . style . display = ( sttMode === 'batch' && sttState === 'idle' ) ? '' : 'none' ;
461+ }
462+
385463 // Level bars
386464 levelBars . style . display = sttState === 'recording' ? '' : 'none' ;
387465
0 commit comments