Skip to content

Commit 7894b5f

Browse files
abhisekupadhyayasanchitmonga22
authored andcommitted
fixed Status field semantic mismatch and Timing left partially error
1 parent 56a4f65 commit 7894b5f

4 files changed

Lines changed: 24 additions & 5 deletions

File tree

sdk/runanywhere-commons/include/rac/backends/rac_llm_llamacpp.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,12 @@ RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate_stream(
177177
* @param options Generation options
178178
* @param callback Callback for each token
179179
* @param user_data User context passed to callback
180-
* @param timing_out Output: Benchmark timing (can be NULL for no timing)
180+
* @param timing_out Output: Benchmark timing struct, caller-allocated.
181+
* Must remain valid for the duration of the call.
182+
* Caller should initialize via rac_benchmark_timing_init() before passing.
183+
* On success, all t2/t3/t5 fields are populated.
184+
* On failure, status is set but timing fields may be partial.
185+
* Pass NULL to skip timing (zero overhead).
181186
* @return RAC_SUCCESS or error code
182187
*/
183188
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate_stream_with_timing(

sdk/runanywhere-commons/include/rac/core/rac_benchmark.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,12 @@ typedef struct rac_benchmark_timing {
6767
int32_t output_tokens;
6868

6969
/**
70-
* Status of the request:
71-
* - 0: Success
72-
* - Non-zero: Error code (from rac_result_t)
70+
* Status of the benchmark request.
71+
* Uses RAC_BENCHMARK_STATUS_* codes:
72+
* - RAC_BENCHMARK_STATUS_SUCCESS (0): Completed successfully
73+
* - RAC_BENCHMARK_STATUS_ERROR (1): Failed
74+
* - RAC_BENCHMARK_STATUS_TIMEOUT (2): Timed out
75+
* - RAC_BENCHMARK_STATUS_CANCELLED (3): Cancelled
7376
*/
7477
int32_t status;
7578

sdk/runanywhere-commons/include/rac/features/llm/rac_llm_component.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,13 @@ RAC_API rac_result_t rac_llm_component_generate_stream(
217217
* @param complete_callback Called when generation completes
218218
* @param error_callback Called on error
219219
* @param user_data User context passed to callbacks
220-
* @param timing_out Output: Benchmark timing (can be NULL for no timing)
220+
* @param timing_out Output: Benchmark timing struct, caller-allocated.
221+
* Must remain valid for the duration of the call.
222+
* Caller should initialize via rac_benchmark_timing_init() before passing.
223+
* Component fills t0/t4/t6, backend fills t2/t3/t5.
224+
* On success, all timing fields are populated.
225+
* On failure, status is set but timing fields may be partial.
226+
* Pass NULL to skip timing (zero overhead).
221227
* @return RAC_SUCCESS or error code
222228
*/
223229
RAC_API rac_result_t rac_llm_component_generate_stream_with_timing(

sdk/runanywhere-commons/src/backends/llamacpp/llamacpp_backend.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,11 @@ bool LlamaCppTextGeneration::generate_stream_with_timing(const TextGenerationReq
883883

884884
if (llama_decode(context_, batch) != 0) {
885885
LOGE("llama_decode failed for prompt");
886+
if (timing_out != nullptr) {
887+
int64_t now = rac_monotonic_now_ms();
888+
timing_out->t3_prefill_end_ms = now;
889+
timing_out->t5_last_token_ms = now;
890+
}
886891
llama_batch_free(batch);
887892
return false;
888893
}

0 commit comments

Comments
 (0)