Skip to content

Commit c2acc46

Browse files
AmanSwarsanchitmonga22
authored andcommitted
feat: complete benchmark metrics — bug fixes, extended metrics, logging, stats, and tests
Fix reviewer-flagged bugs: add error_code field to timing struct, set t6/error_code on all error paths, capture prompt tokens from backend, fix JNI local ref leak. Add extended metrics provider interface, JSON/CSV/log serialization, statistical analysis with percentiles and outlier detection. All 29 unit tests passing. >
1 parent 7894b5f commit c2acc46

17 files changed

Lines changed: 1592 additions & 4 deletions

sdk/runanywhere-commons/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ set(RAC_CORE_SOURCES
276276
src/core/rac_error.cpp
277277
src/core/rac_time.cpp
278278
src/core/rac_benchmark.cpp
279+
src/core/rac_benchmark_metrics.cpp
280+
src/core/rac_benchmark_log.cpp
281+
src/core/rac_benchmark_stats.cpp
279282
src/core/rac_memory.cpp
280283
src/core/rac_logger.cpp
281284
src/core/rac_audio_utils.cpp

sdk/runanywhere-commons/include/rac/core/rac_benchmark.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ typedef struct rac_benchmark_timing {
7676
*/
7777
int32_t status;
7878

79+
/**
80+
* Specific error code when status is not RAC_BENCHMARK_STATUS_SUCCESS.
81+
* Uses rac_result_t error codes (e.g., RAC_ERROR_NOT_SUPPORTED).
82+
* Set to RAC_SUCCESS (0) when status is RAC_BENCHMARK_STATUS_SUCCESS.
83+
*/
84+
rac_result_t error_code;
85+
7986
} rac_benchmark_timing_t;
8087

8188
// =============================================================================
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* @file rac_benchmark_log.h
3+
* @brief RunAnywhere Commons - Benchmark Logging and Serialization
4+
*
5+
* Provides functions to serialize benchmark timing data as JSON or CSV,
6+
* and to log benchmark results via the RAC logging system.
7+
*
8+
* Usage:
9+
* // Log timing summary
10+
* rac_benchmark_timing_log(&timing, "inference_run_1");
11+
*
12+
* // Export as JSON
13+
* char* json = rac_benchmark_timing_to_json(&timing);
14+
* // ... use json ...
15+
* free(json);
16+
*
17+
* // Export as CSV
18+
* char* header = rac_benchmark_timing_to_csv(NULL, RAC_TRUE);
19+
* char* row = rac_benchmark_timing_to_csv(&timing, RAC_FALSE);
20+
* free(header);
21+
* free(row);
22+
*/
23+
24+
#ifndef RAC_BENCHMARK_LOG_H
25+
#define RAC_BENCHMARK_LOG_H
26+
27+
#include "rac/core/rac_benchmark.h"
28+
#include "rac/core/rac_types.h"
29+
30+
#ifdef __cplusplus
31+
extern "C" {
32+
#endif
33+
34+
// =============================================================================
35+
// JSON SERIALIZATION
36+
// =============================================================================
37+
38+
/**
39+
* Serializes a benchmark timing struct as a JSON string.
40+
*
41+
* Includes all timing fields plus derived metrics:
42+
* - ttft_ms: Time to first token (t4 - t0)
43+
* - prefill_ms: Prefill duration (t3 - t2)
44+
* - decode_ms: Decode duration (t5 - t3)
45+
* - e2e_ms: End-to-end latency (t6 - t0)
46+
* - decode_tps: Decode throughput (output_tokens / decode_ms * 1000)
47+
*
48+
* @param timing Timing struct to serialize (NULL returns NULL)
49+
* @return Heap-allocated JSON string (caller must free()), or NULL on error
50+
*/
51+
RAC_API char* rac_benchmark_timing_to_json(const rac_benchmark_timing_t* timing);
52+
53+
// =============================================================================
54+
// CSV SERIALIZATION
55+
// =============================================================================
56+
57+
/**
58+
* Serializes a benchmark timing struct as a CSV row.
59+
*
60+
* @param timing Timing struct to serialize (ignored when header is RAC_TRUE)
61+
* @param header If RAC_TRUE, returns the CSV header row instead of data
62+
* @return Heap-allocated CSV string (caller must free()), or NULL on error
63+
*/
64+
RAC_API char* rac_benchmark_timing_to_csv(const rac_benchmark_timing_t* timing, rac_bool_t header);
65+
66+
// =============================================================================
67+
// LOGGING
68+
// =============================================================================
69+
70+
/**
71+
* Logs a benchmark timing summary via the RAC logging system.
72+
*
73+
* Outputs key metrics at INFO level under the "Benchmark" category:
74+
* - TTFT, prefill time, decode time, E2E latency
75+
* - Token counts and throughput
76+
* - Status and error code
77+
*
78+
* @param timing Timing struct to log (NULL is a no-op)
79+
* @param label Optional label for this benchmark run (can be NULL)
80+
*/
81+
RAC_API void rac_benchmark_timing_log(const rac_benchmark_timing_t* timing, const char* label);
82+
83+
#ifdef __cplusplus
84+
}
85+
#endif
86+
87+
#endif /* RAC_BENCHMARK_LOG_H */
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**
2+
* @file rac_benchmark_metrics.h
3+
* @brief RunAnywhere Commons - Extended Benchmark Metrics
4+
*
5+
* Defines extended device/platform metrics captured alongside benchmark timing.
6+
* Actual metric collection is platform-specific (iOS/Android) and provided
7+
* via a callback provider pattern. The C++ layer defines interfaces only.
8+
*
9+
* Usage:
10+
* // Platform SDK registers a provider during init:
11+
* rac_benchmark_set_metrics_provider(my_provider_fn, my_context);
12+
*
13+
* // Commons layer captures metrics at t0 and t6:
14+
* rac_benchmark_extended_metrics_t metrics;
15+
* rac_benchmark_capture_metrics(&metrics);
16+
*/
17+
18+
#ifndef RAC_BENCHMARK_METRICS_H
19+
#define RAC_BENCHMARK_METRICS_H
20+
21+
#include "rac/core/rac_types.h"
22+
23+
#ifdef __cplusplus
24+
extern "C" {
25+
#endif
26+
27+
// =============================================================================
28+
// EXTENDED METRICS STRUCT
29+
// =============================================================================
30+
31+
/**
32+
* Extended device/platform metrics captured during benchmark.
33+
*
34+
* All fields default to -1 (unavailable) unless the platform provider
35+
* populates them. This allows partial metric support across platforms.
36+
*/
37+
typedef struct rac_benchmark_extended_metrics {
38+
/** Resident memory usage in bytes at capture time (-1 if unavailable) */
39+
int64_t memory_usage_bytes;
40+
41+
/** Peak memory usage in bytes during request (-1 if unavailable) */
42+
int64_t memory_peak_bytes;
43+
44+
/** CPU temperature in Celsius (-1.0 if unavailable) */
45+
float cpu_temperature_celsius;
46+
47+
/** Battery level 0.0-1.0 (-1.0 if unavailable) */
48+
float battery_level;
49+
50+
/** GPU utilization 0-100% (-1.0 if unavailable) */
51+
float gpu_utilization_percent;
52+
53+
/**
54+
* Thermal state of the device.
55+
* 0 = nominal
56+
* 1 = fair
57+
* 2 = serious
58+
* 3 = critical
59+
* -1 = unavailable
60+
*/
61+
int32_t thermal_state;
62+
63+
} rac_benchmark_extended_metrics_t;
64+
65+
// =============================================================================
66+
// METRICS PROVIDER CALLBACK
67+
// =============================================================================
68+
69+
/**
70+
* Callback type for platform-specific metrics collection.
71+
*
72+
* The platform SDK (Swift/Kotlin) implements this to fill in
73+
* whatever device metrics are available on that platform.
74+
*
75+
* @param out Metrics struct to populate (pre-initialized to unavailable values)
76+
* @param user_data Platform context passed during registration
77+
*/
78+
typedef void (*rac_benchmark_metrics_provider_fn)(rac_benchmark_extended_metrics_t* out,
79+
void* user_data);
80+
81+
// =============================================================================
82+
// METRICS API
83+
// =============================================================================
84+
85+
/**
86+
* Registers a platform-specific metrics provider.
87+
*
88+
* Call this during SDK initialization. Only one provider can be active.
89+
* Setting a new provider replaces the previous one.
90+
* Pass NULL to unregister.
91+
*
92+
* @param provider Metrics provider callback (NULL to unregister)
93+
* @param user_data Platform context passed to provider calls
94+
*/
95+
RAC_API void rac_benchmark_set_metrics_provider(rac_benchmark_metrics_provider_fn provider,
96+
void* user_data);
97+
98+
/**
99+
* Captures current device metrics using the registered provider.
100+
*
101+
* If no provider is registered, all fields are set to unavailable (-1).
102+
* Thread-safe: can be called from any thread.
103+
*
104+
* @param out Metrics struct to populate (must not be NULL)
105+
*/
106+
RAC_API void rac_benchmark_capture_metrics(rac_benchmark_extended_metrics_t* out);
107+
108+
/**
109+
* Initializes an extended metrics struct to unavailable values.
110+
*
111+
* @param metrics Metrics struct to initialize (must not be NULL)
112+
*/
113+
RAC_API void rac_benchmark_extended_metrics_init(rac_benchmark_extended_metrics_t* metrics);
114+
115+
#ifdef __cplusplus
116+
}
117+
#endif
118+
119+
#endif /* RAC_BENCHMARK_METRICS_H */
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/**
2+
* @file rac_benchmark_stats.h
3+
* @brief RunAnywhere Commons - Benchmark Statistical Analysis
4+
*
5+
* Collects benchmark timing observations and computes statistical summaries
6+
* including percentiles (P50/P95/P99), mean, stddev, and outlier detection.
7+
*
8+
* Usage:
9+
* rac_benchmark_stats_handle_t stats;
10+
* rac_benchmark_stats_create(&stats);
11+
*
12+
* // Record observations
13+
* rac_benchmark_stats_record(stats, &timing1);
14+
* rac_benchmark_stats_record(stats, &timing2);
15+
*
16+
* // Get summary
17+
* rac_benchmark_summary_t summary;
18+
* rac_benchmark_stats_get_summary(stats, &summary);
19+
*
20+
* // Export as JSON
21+
* char* json = rac_benchmark_stats_summary_to_json(&summary);
22+
* free(json);
23+
*
24+
* rac_benchmark_stats_destroy(stats);
25+
*/
26+
27+
#ifndef RAC_BENCHMARK_STATS_H
28+
#define RAC_BENCHMARK_STATS_H
29+
30+
#include "rac/core/rac_benchmark.h"
31+
#include "rac/core/rac_types.h"
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
37+
// =============================================================================
38+
// STATS HANDLE (OPAQUE)
39+
// =============================================================================
40+
41+
/** Opaque handle for a benchmark stats collector */
42+
typedef void* rac_benchmark_stats_handle_t;
43+
44+
// =============================================================================
45+
// SUMMARY STRUCT
46+
// =============================================================================
47+
48+
/**
49+
* Statistical summary of collected benchmark observations.
50+
*
51+
* All time values are in milliseconds. Throughput is in tokens/second.
52+
* Fields are 0 if no valid observations were recorded for that metric.
53+
*/
54+
typedef struct rac_benchmark_summary {
55+
/** Number of observations recorded */
56+
int32_t count;
57+
58+
// Time to First Token stats (t4 - t0)
59+
double ttft_p50_ms;
60+
double ttft_p95_ms;
61+
double ttft_p99_ms;
62+
double ttft_min_ms;
63+
double ttft_max_ms;
64+
double ttft_mean_ms;
65+
double ttft_stddev_ms;
66+
67+
// Prefill duration stats (t3 - t2)
68+
double prefill_p50_ms;
69+
double prefill_p95_ms;
70+
double prefill_p99_ms;
71+
72+
// Decode throughput stats (output_tokens / (t5 - t3) * 1000)
73+
double decode_tps_p50;
74+
double decode_tps_p95;
75+
double decode_tps_p99;
76+
77+
// End-to-end latency stats (t6 - t0)
78+
double e2e_p50_ms;
79+
double e2e_p95_ms;
80+
double e2e_p99_ms;
81+
82+
/** Number of observations where E2E > mean + 2*stddev */
83+
int32_t outlier_count;
84+
85+
} rac_benchmark_summary_t;
86+
87+
// =============================================================================
88+
// STATS COLLECTOR API
89+
// =============================================================================
90+
91+
/**
92+
* Creates a new benchmark stats collector.
93+
*
94+
* @param out_handle Output: collector handle
95+
* @return RAC_SUCCESS or RAC_ERROR_NULL_POINTER
96+
*/
97+
RAC_API rac_result_t rac_benchmark_stats_create(rac_benchmark_stats_handle_t* out_handle);
98+
99+
/**
100+
* Destroys a stats collector and frees all associated memory.
101+
*
102+
* @param handle Collector handle (NULL is a no-op)
103+
*/
104+
RAC_API void rac_benchmark_stats_destroy(rac_benchmark_stats_handle_t handle);
105+
106+
/**
107+
* Records a benchmark timing observation.
108+
*
109+
* Only observations with status == RAC_BENCHMARK_STATUS_SUCCESS are recorded.
110+
* Derived metrics (TTFT, prefill, decode TPS, E2E) are extracted and stored.
111+
*
112+
* Thread-safe: can be called from any thread.
113+
*
114+
* @param handle Collector handle
115+
* @param timing Timing struct to record
116+
*/
117+
RAC_API void rac_benchmark_stats_record(rac_benchmark_stats_handle_t handle,
118+
const rac_benchmark_timing_t* timing);
119+
120+
/**
121+
* Resets the collector, discarding all recorded observations.
122+
*
123+
* @param handle Collector handle
124+
*/
125+
RAC_API void rac_benchmark_stats_reset(rac_benchmark_stats_handle_t handle);
126+
127+
/**
128+
* Returns the number of recorded observations.
129+
*
130+
* @param handle Collector handle
131+
* @return Observation count (0 if handle is NULL)
132+
*/
133+
RAC_API int32_t rac_benchmark_stats_count(rac_benchmark_stats_handle_t handle);
134+
135+
/**
136+
* Computes a statistical summary of all recorded observations.
137+
*
138+
* @param handle Collector handle
139+
* @param out_summary Output: summary struct
140+
* @return RAC_SUCCESS, RAC_ERROR_NULL_POINTER, or RAC_ERROR_INVALID_STATE (no data)
141+
*/
142+
RAC_API rac_result_t rac_benchmark_stats_get_summary(rac_benchmark_stats_handle_t handle,
143+
rac_benchmark_summary_t* out_summary);
144+
145+
/**
146+
* Serializes a summary struct as a JSON string.
147+
*
148+
* @param summary Summary struct to serialize (NULL returns NULL)
149+
* @return Heap-allocated JSON string (caller must free()), or NULL on error
150+
*/
151+
RAC_API char* rac_benchmark_stats_summary_to_json(const rac_benchmark_summary_t* summary);
152+
153+
#ifdef __cplusplus
154+
}
155+
#endif
156+
157+
#endif /* RAC_BENCHMARK_STATS_H */

0 commit comments

Comments
 (0)