Skip to content

Commit ff52534

Browse files
fix stt
1 parent ffcfc1b commit ff52534

1 file changed

Lines changed: 15 additions & 5 deletions

File tree

sdk/runanywhere-commons/src/backends/metalrt/rac_stt_metalrt.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
#include "rac_stt_metalrt.h"
77

8+
#include <cstdint>
89
#include <cstdlib>
910
#include <cstring>
11+
#include <vector>
1012

1113
#include "metalrt_c_api.h"
1214

@@ -65,13 +67,21 @@ rac_result_t rac_stt_metalrt_transcribe(rac_handle_t handle, const void* audio_d
6567
auto* impl = static_cast<rac_stt_metalrt_impl*>(handle);
6668
if (!impl->loaded) return RAC_ERROR_BACKEND_NOT_READY;
6769

68-
// MetalRT expects float32 samples + sample count + sample rate
69-
// RAC STT passes raw audio bytes — assume float32 PCM at 16kHz
70-
const auto* samples = static_cast<const float*>(audio_data);
71-
int n_samples = static_cast<int>(audio_size / sizeof(float));
70+
// SDK audio capture sends Int16 PCM at 16 kHz.
71+
// Convert to Float32 normalized [-1.0, 1.0] for metalrt_whisper_transcribe.
72+
const auto* int16_samples = static_cast<const int16_t*>(audio_data);
73+
int n_samples = static_cast<int>(audio_size / sizeof(int16_t));
7274
int sample_rate = 16000;
7375

74-
const char* text = metalrt_whisper_transcribe(impl->handle, samples, n_samples, sample_rate);
76+
std::vector<float> float_samples(n_samples);
77+
for (int i = 0; i < n_samples; i++) {
78+
float_samples[i] = static_cast<float>(int16_samples[i]) / 32768.0f;
79+
}
80+
81+
RAC_LOG_INFO(LOG_CAT, "Transcribing %d samples (%.1fs) at %d Hz",
82+
n_samples, static_cast<float>(n_samples) / sample_rate, sample_rate);
83+
84+
const char* text = metalrt_whisper_transcribe(impl->handle, float_samples.data(), n_samples, sample_rate);
7585
if (!text) {
7686
rac_error_set_details("metalrt_whisper_transcribe returned null");
7787
return RAC_ERROR_INFERENCE_FAILED;

0 commit comments

Comments
 (0)