Skip to content

Commit 03d6416

Browse files
committed
fix(tts): scan WAV data chunk instead of hardcoding 44-byte header offset
WAV files with extra chunks (LIST, fact, bext) had metadata bytes fed into AudioTrack as PCM, causing distorted playback. Now walks the chunk structure to find the actual "data" chunk start.
1 parent 8c39083 commit 03d6416

1 file changed

Lines changed: 22 additions & 5 deletions

File tree

examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/presentation/tts/TextToSpeechViewModel.kt

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -509,25 +509,42 @@ class TextToSpeechViewModel(
509509
audioData[3] == 'F'.code.toByte()
510510

511511
val sampleRate: Int
512-
val headerSize: Int
512+
val pcmOffset: Int
513513

514514
if (isWav) {
515515
// WAV header: bytes 24-27 = sample rate (little-endian uint32)
516516
sampleRate = (audioData[24].toInt() and 0xFF) or
517517
((audioData[25].toInt() and 0xFF) shl 8) or
518518
((audioData[26].toInt() and 0xFF) shl 16) or
519519
((audioData[27].toInt() and 0xFF) shl 24)
520-
headerSize = 44
521-
Timber.i("WAV header: sampleRate=$sampleRate")
520+
521+
// Scan for the "data" chunk — WAV files can have extra
522+
// chunks (LIST, fact, bext, …) before the PCM payload.
523+
var offset = 12 // skip RIFF header (12 bytes)
524+
var dataStart = -1
525+
while (offset + 8 <= audioData.size) {
526+
val chunkId = String(audioData, offset, 4, Charsets.US_ASCII)
527+
val chunkSize = (audioData[offset + 4].toInt() and 0xFF) or
528+
((audioData[offset + 5].toInt() and 0xFF) shl 8) or
529+
((audioData[offset + 6].toInt() and 0xFF) shl 16) or
530+
((audioData[offset + 7].toInt() and 0xFF) shl 24)
531+
if (chunkId == "data") {
532+
dataStart = offset + 8
533+
break
534+
}
535+
offset += 8 + chunkSize
536+
}
537+
pcmOffset = if (dataStart > 0) dataStart else 44 // fallback for malformed files
538+
Timber.i("WAV header: sampleRate=$sampleRate, pcmOffset=$pcmOffset")
522539
} else {
523540
sampleRate = _uiState.value.sampleRate ?: 22050
524-
headerSize = 0
541+
pcmOffset = 0
525542
}
526543

527544
val channelConfig = AudioFormat.CHANNEL_OUT_MONO
528545
val audioFormat = AudioFormat.ENCODING_PCM_16BIT
529546

530-
val pcmData = audioData.copyOfRange(headerSize, audioData.size)
547+
val pcmData = audioData.copyOfRange(pcmOffset, audioData.size)
531548

532549
val bufferSize = AudioTrack.getMinBufferSize(sampleRate, channelConfig, audioFormat)
533550

0 commit comments

Comments
 (0)