@@ -608,23 +608,29 @@ rac_result_t rac_voice_agent_process_voice_turn(rac_voice_agent_handle_t handle,
608608 }
609609
610610 // Step 4: Convert Float32 PCM to WAV format for playback
611- // TTS returns raw Float32 samples, but audio players need WAV format
611+ // Platform TTS (e.g. System TTS) plays audio directly and returns no PCM data.
612+ // Only convert when actual audio data is returned (e.g. Piper/ONNX TTS).
612613 void * wav_data = nullptr ;
613614 size_t wav_size = 0 ;
614- result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
615- tts_result.sample_rate > 0 ? tts_result.sample_rate
616- : RAC_TTS_DEFAULT_SAMPLE_RATE,
617- &wav_data, &wav_size);
618615
619- if (result != RAC_SUCCESS) {
620- RAC_LOG_ERROR (" VoiceAgent" , " Failed to convert audio to WAV format" );
621- rac_stt_result_free (&stt_result);
622- rac_llm_result_free (&llm_result);
623- rac_tts_result_free (&tts_result);
624- return result;
625- }
616+ if (tts_result.audio_data != nullptr && tts_result.audio_size > 0 ) {
617+ result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
618+ tts_result.sample_rate > 0 ? tts_result.sample_rate
619+ : RAC_TTS_DEFAULT_SAMPLE_RATE,
620+ &wav_data, &wav_size);
626621
627- RAC_LOG_DEBUG (" VoiceAgent" , " Converted PCM to WAV format" );
622+ if (result != RAC_SUCCESS) {
623+ RAC_LOG_ERROR (" VoiceAgent" , " Failed to convert audio to WAV format" );
624+ rac_stt_result_free (&stt_result);
625+ rac_llm_result_free (&llm_result);
626+ rac_tts_result_free (&tts_result);
627+ return result;
628+ }
629+
630+ RAC_LOG_DEBUG (" VoiceAgent" , " Converted PCM to WAV format" );
631+ } else {
632+ RAC_LOG_DEBUG (" VoiceAgent" , " Platform TTS played audio directly — no PCM data to convert" );
633+ }
628634
629635 // Build result (mirrors Swift's VoiceAgentResult)
630636 out_result->speech_detected = RAC_TRUE;
@@ -726,25 +732,29 @@ rac_result_t rac_voice_agent_process_stream(rac_voice_agent_handle_t handle, con
726732 }
727733
728734 // Step 4: Convert Float32 PCM to WAV format for playback
735+ // Platform TTS plays audio directly and returns no PCM data — skip conversion.
729736 void * wav_data = nullptr ;
730737 size_t wav_size = 0 ;
731- result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
732- tts_result.sample_rate > 0 ? tts_result.sample_rate
733- : RAC_TTS_DEFAULT_SAMPLE_RATE,
734- &wav_data, &wav_size);
735738
736- if (result != RAC_SUCCESS) {
737- rac_stt_result_free (&stt_result);
738- rac_llm_result_free (&llm_result);
739- rac_tts_result_free (&tts_result);
740- rac_voice_agent_event_t error_event = {};
741- error_event.type = RAC_VOICE_AGENT_EVENT_ERROR;
742- error_event.data .error_code = result;
743- callback (&error_event, user_data);
744- return result;
739+ if (tts_result.audio_data != nullptr && tts_result.audio_size > 0 ) {
740+ result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
741+ tts_result.sample_rate > 0 ? tts_result.sample_rate
742+ : RAC_TTS_DEFAULT_SAMPLE_RATE,
743+ &wav_data, &wav_size);
744+
745+ if (result != RAC_SUCCESS) {
746+ rac_stt_result_free (&stt_result);
747+ rac_llm_result_free (&llm_result);
748+ rac_tts_result_free (&tts_result);
749+ rac_voice_agent_event_t error_event = {};
750+ error_event.type = RAC_VOICE_AGENT_EVENT_ERROR;
751+ error_event.data .error_code = result;
752+ callback (&error_event, user_data);
753+ return result;
754+ }
745755 }
746756
747- // Emit audio synthesized event (with WAV data)
757+ // Emit audio synthesized event (with WAV data, or empty for platform TTS )
748758 rac_voice_agent_event_t audio_event = {};
749759 audio_event.type = RAC_VOICE_AGENT_EVENT_AUDIO_SYNTHESIZED;
750760 audio_event.data .audio .audio_data = wav_data;
@@ -845,23 +855,27 @@ rac_result_t rac_voice_agent_synthesize_speech(rac_voice_agent_handle_t handle,
845855 return result;
846856 }
847857
848- // Convert Float32 PCM to WAV format for playback
849- void * wav_data = nullptr ;
850- size_t wav_size = 0 ;
851- result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
852- tts_result.sample_rate > 0 ? tts_result.sample_rate
853- : RAC_TTS_DEFAULT_SAMPLE_RATE,
854- &wav_data, &wav_size);
858+ // Platform TTS plays audio directly and returns no PCM data — skip conversion.
859+ if (tts_result.audio_data != nullptr && tts_result.audio_size > 0 ) {
860+ void * wav_data = nullptr ;
861+ size_t wav_size = 0 ;
862+ result = rac_audio_float32_to_wav (tts_result.audio_data , tts_result.audio_size ,
863+ tts_result.sample_rate > 0 ? tts_result.sample_rate
864+ : RAC_TTS_DEFAULT_SAMPLE_RATE,
865+ &wav_data, &wav_size);
855866
856- if (result != RAC_SUCCESS) {
857- rac_tts_result_free (&tts_result);
858- return result;
859- }
867+ if (result != RAC_SUCCESS) {
868+ rac_tts_result_free (&tts_result);
869+ return result;
870+ }
860871
861- *out_audio = wav_data;
862- *out_audio_size = wav_size;
872+ *out_audio = wav_data;
873+ *out_audio_size = wav_size;
874+ } else {
875+ *out_audio = nullptr ;
876+ *out_audio_size = 0 ;
877+ }
863878
864- // Free the original PCM data
865879 rac_tts_result_free (&tts_result);
866880
867881 return RAC_SUCCESS;
0 commit comments