Skip to content

Commit feeee0a

Browse files
sanchitmonga22shubhammalhotra28
authored andcommitted
RAG rewrite
1 parent e2f8e22 commit feeee0a

64 files changed

Lines changed: 2592 additions & 5395 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Package.swift

Lines changed: 10 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@ let useLocalBinaries = false // Toggle: true for local dev, false for release
4343
// Updated automatically by CI/CD during releases
4444
let sdkVersion = "0.19.6"
4545

46-
// RAG binary is only available in local dev mode until the release artifact is published.
47-
// In remote mode, the RAG xcframework zip + checksum don't exist yet, so including the
48-
// binary target would block ALL SPM package resolution (not just RAG).
49-
// Set to true once RABackendRAG-v<version>.zip is published to GitHub releases.
50-
let ragRemoteBinaryAvailable = false
51-
5246
let package = Package(
5347
name: "runanywhere-sdks",
5448
platforms: [
@@ -87,7 +81,7 @@ let package = Package(
8781
name: "RunAnywhereWhisperKit",
8882
targets: ["WhisperKitRuntime"]
8983
),
90-
] + ragProducts(),
84+
],
9185
dependencies: [
9286
.package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"),
9387
.package(url: "https://github.com/Alamofire/Alamofire.git", from: "5.9.0"),
@@ -152,7 +146,8 @@ let package = Package(
152146
.product(name: "StableDiffusion", package: "ml-stable-diffusion"),
153147
"CRACommons",
154148
"RACommonsBinary",
155-
] + ragCoreDependencies(),
149+
"RABackendRAGBinary",
150+
],
156151
path: "sdk/runanywhere-swift/Sources/RunAnywhere",
157152
exclude: ["CRACommons"],
158153
swiftSettings: [
@@ -231,62 +226,9 @@ let package = Package(
231226
path: "sdk/runanywhere-swift/Tests/RunAnywhereTests"
232227
),
233228

234-
] + ragTargets() + binaryTargets()
229+
] + binaryTargets()
235230
)
236231

237-
// =============================================================================
238-
// RAG TARGET HELPERS
239-
// =============================================================================
240-
// RAG targets are gated because the remote binary artifact doesn't exist yet.
241-
// Including a binary target with a placeholder checksum blocks ALL SPM resolution.
242-
243-
/// RAG product (library) — only included when the binary is available
244-
func ragProducts() -> [Product] {
245-
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
246-
return [
247-
.library(
248-
name: "RunAnywhereRAG",
249-
targets: ["RAGRuntime"]
250-
),
251-
]
252-
}
253-
254-
/// RAG dependency for the RunAnywhere core target
255-
/// NOTE: Core already accesses RAG C headers via CRACommons umbrella (rac_rag.h, rac_rag_pipeline.h).
256-
/// No additional dependency needed — RAGBackend is only used by RAGRuntime.
257-
func ragCoreDependencies() -> [Target.Dependency] {
258-
return []
259-
}
260-
261-
/// RAG-related targets (C bridge + Swift runtime)
262-
func ragTargets() -> [Target] {
263-
guard useLocalBinaries || ragRemoteBinaryAvailable else { return [] }
264-
return [
265-
// C Bridge Module - RAG Backend Headers
266-
.target(
267-
name: "RAGBackend",
268-
dependencies: ["RABackendRAGBinary"],
269-
path: "sdk/runanywhere-swift/Sources/RAGRuntime/include",
270-
publicHeadersPath: "."
271-
),
272-
// RAG Runtime Backend
273-
.target(
274-
name: "RAGRuntime",
275-
dependencies: [
276-
"RunAnywhere",
277-
"RAGBackend",
278-
"ONNXRuntime",
279-
"LlamaCPPRuntime",
280-
],
281-
path: "sdk/runanywhere-swift/Sources/RAGRuntime",
282-
exclude: ["include"],
283-
linkerSettings: [
284-
.linkedLibrary("c++"),
285-
]
286-
),
287-
]
288-
}
289-
290232
// =============================================================================
291233
// BINARY TARGET SELECTION
292234
// =============================================================================
@@ -369,16 +311,13 @@ func binaryTargets() -> [Target] {
369311
),
370312
]
371313

372-
// Only include RAG binary when the release artifact is available
373-
if ragRemoteBinaryAvailable {
374-
targets.append(
375-
.binaryTarget(
376-
name: "RABackendRAGBinary",
377-
url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
378-
checksum: "0000000000000000000000000000000000000000000000000000000000000000" // Replace with actual checksum
379-
)
314+
targets.append(
315+
.binaryTarget(
316+
name: "RABackendRAGBinary",
317+
url: "https://github.com/RunanywhereAI/runanywhere-sdks/releases/download/v\(sdkVersion)/RABackendRAG-v\(sdkVersion).zip",
318+
checksum: "0000000000000000000000000000000000000000000000000000000000000000" // TODO: Replace with actual checksum on release
380319
)
381-
}
320+
)
382321

383322
return targets
384323
}

sdk/runanywhere-commons/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ option(RAC_BUILD_PLATFORM "Build platform backend (Apple Foundation Models, Syst
3232
option(RAC_BUILD_BACKENDS "Build ML backends (LlamaCPP, ONNX, WhisperCPP, RAG)" OFF)
3333
option(RAC_BACKEND_LLAMACPP "Build LlamaCPP backend" ON)
3434
option(RAC_BACKEND_ONNX "Build ONNX backend" ON)
35-
option(RAC_BACKEND_RAG "Build RAG backend (USearch)" ON)
35+
option(RAC_BACKEND_RAG "Build RAG pipeline (USearch vector search)" ON)
3636
# WhisperCPP OFF by default - Sherpa-ONNX (NeMo CTC / Parakeet) is now the primary STT backend
3737
option(RAC_BACKEND_WHISPERCPP "Build WhisperCPP backend" OFF)
3838
if(APPLE)
@@ -411,9 +411,9 @@ if(RAC_BUILD_BACKENDS)
411411
add_subdirectory(src/backends/whispercpp)
412412
endif()
413413

414-
if(RAC_BACKEND_RAG AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/backends/rag/CMakeLists.txt")
415-
message(STATUS " - RAG backend (USearch)")
416-
add_subdirectory(src/backends/rag)
414+
if(RAC_BACKEND_RAG AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/features/rag/CMakeLists.txt")
415+
message(STATUS " - RAG pipeline (USearch)")
416+
add_subdirectory(src/features/rag)
417417
endif()
418418
endif()
419419

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* @file rac_embeddings_onnx.h
3+
* @brief RunAnywhere Commons - ONNX Embeddings Backend Public API
4+
*
5+
* Registration for the ONNX-based embedding provider (sentence-transformer models).
6+
* Registers with RAC_CAPABILITY_EMBEDDINGS via the service registry.
7+
*/
8+
9+
#ifndef RAC_EMBEDDINGS_ONNX_H
10+
#define RAC_EMBEDDINGS_ONNX_H
11+
12+
#include "rac/core/rac_types.h"
13+
#include "rac/core/rac_error.h"
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
19+
/**
20+
* @brief Register the ONNX embeddings backend
21+
*
22+
* Registers a service provider for RAC_CAPABILITY_EMBEDDINGS.
23+
* Handles .onnx model files with sentence-transformer architecture.
24+
*
25+
* @return RAC_SUCCESS or error code
26+
*/
27+
RAC_API rac_result_t rac_backend_onnx_embeddings_register(void);
28+
29+
/**
30+
* @brief Unregister the ONNX embeddings backend
31+
*
32+
* @return RAC_SUCCESS or error code
33+
*/
34+
RAC_API rac_result_t rac_backend_onnx_embeddings_unregister(void);
35+
36+
#ifdef __cplusplus
37+
}
38+
#endif
39+
40+
#endif /* RAC_EMBEDDINGS_ONNX_H */

sdk/runanywhere-commons/include/rac/backends/rac_llm_llamacpp.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,69 @@ RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_clear_lora(rac_handle_t handle);
240240
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_get_lora_info(rac_handle_t handle,
241241
char** out_json);
242242

243+
// =============================================================================
244+
// ADAPTIVE CONTEXT API (for RAG pipelines)
245+
// =============================================================================
246+
247+
/**
248+
* Inject a system prompt into the KV cache at position 0.
249+
* Clears existing KV cache first, then decodes the prompt tokens.
250+
*
251+
* @param handle Service handle (from rac_llm_llamacpp_create)
252+
* @param prompt System prompt text
253+
* @return RAC_SUCCESS or error code
254+
*/
255+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_inject_system_prompt(rac_handle_t handle,
256+
const char* prompt);
257+
258+
/**
259+
* Append text to the KV cache after current content.
260+
* Does not clear existing KV cache — adds at current position.
261+
*
262+
* @param handle Service handle
263+
* @param text Text to append
264+
* @return RAC_SUCCESS or error code
265+
*/
266+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_append_context(rac_handle_t handle,
267+
const char* text);
268+
269+
/**
270+
* Probe confidence that accumulated context answers a query.
271+
* Non-destructive to KV cache — probe tokens are cleaned up.
272+
*
273+
* @param handle Service handle
274+
* @param context Context passage (can be empty if context is in KV cache)
275+
* @param query The user question
276+
* @param out_confidence Output: confidence in [0.0, 1.0]
277+
* @return RAC_SUCCESS or error code
278+
*/
279+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_probe_confidence(rac_handle_t handle,
280+
const char* context,
281+
const char* query,
282+
float* out_confidence);
283+
284+
/**
285+
* Generate response from accumulated KV cache state.
286+
* Unlike rac_llm_llamacpp_generate(), does NOT clear the KV cache first.
287+
*
288+
* @param handle Service handle
289+
* @param query Query/suffix to append before generation
290+
* @param options Generation options (can be NULL for defaults)
291+
* @param out_result Output: Generation result
292+
* @return RAC_SUCCESS or error code
293+
*/
294+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate_from_context(
295+
rac_handle_t handle, const char* query, const rac_llm_options_t* options,
296+
rac_llm_result_t* out_result);
297+
298+
/**
299+
* Clear all KV cache state.
300+
*
301+
* @param handle Service handle
302+
* @return RAC_SUCCESS or error code
303+
*/
304+
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_clear_context(rac_handle_t handle);
305+
243306
// =============================================================================
244307
// BACKEND REGISTRATION
245308
// =============================================================================

sdk/runanywhere-commons/include/rac/features/llm/rac_llm_service.h

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,30 @@ typedef struct rac_llm_service_ops {
6161

6262
/** Get loaded LoRA adapters info as JSON (optional, NULL if not supported) */
6363
rac_result_t (*get_lora_info)(void* impl, char** out_json);
64+
65+
/** Inject system prompt into KV cache at position 0 (optional, NULL if not supported) */
66+
rac_result_t (*inject_system_prompt)(void* impl, const char* prompt);
67+
68+
/** Append text to KV cache after current content (optional, NULL if not supported) */
69+
rac_result_t (*append_context)(void* impl, const char* text);
70+
71+
/**
72+
* Probe confidence that accumulated context answers query (optional, NULL if not supported).
73+
* Returns confidence in [0.0, 1.0] via out_confidence. Non-destructive to KV cache.
74+
*/
75+
rac_result_t (*probe_confidence)(void* impl, const char* context, const char* query,
76+
float* out_confidence);
77+
78+
/**
79+
* Generate response from accumulated KV cache state (optional, NULL if not supported).
80+
* Unlike generate(), does NOT clear KV cache first.
81+
*/
82+
rac_result_t (*generate_from_context)(void* impl, const char* query,
83+
const rac_llm_options_t* options,
84+
rac_llm_result_t* out_result);
85+
86+
/** Clear all KV cache state (optional, NULL if not supported) */
87+
rac_result_t (*clear_context)(void* impl);
6488
} rac_llm_service_ops_t;
6589

6690
/**
@@ -168,6 +192,78 @@ RAC_API void rac_llm_destroy(rac_handle_t handle);
168192
*/
169193
RAC_API void rac_llm_result_free(rac_llm_result_t* result);
170194

195+
// =============================================================================
196+
// ADAPTIVE CONTEXT API - For RAG and similar pipelines
197+
// =============================================================================
198+
199+
/**
200+
* @brief Inject a system prompt into the LLM's KV cache at position 0
201+
*
202+
* Clears existing KV cache, then seeds with the given prompt.
203+
* Optional — returns RAC_ERROR_NOT_SUPPORTED if backend doesn't support it.
204+
*
205+
* @param handle Service handle
206+
* @param prompt System prompt text
207+
* @return RAC_SUCCESS or error code
208+
*/
209+
RAC_API rac_result_t rac_llm_inject_system_prompt(rac_handle_t handle, const char* prompt);
210+
211+
/**
212+
* @brief Append text to the LLM's KV cache after current content
213+
*
214+
* Does not clear existing KV state — accumulates context incrementally.
215+
* Optional — returns RAC_ERROR_NOT_SUPPORTED if backend doesn't support it.
216+
*
217+
* @param handle Service handle
218+
* @param text Text to append
219+
* @return RAC_SUCCESS or error code
220+
*/
221+
RAC_API rac_result_t rac_llm_append_context(rac_handle_t handle, const char* text);
222+
223+
/**
224+
* @brief Probe whether accumulated context answers a query
225+
*
226+
* Uses logit probing (Yes/No softmax) to estimate confidence.
227+
* Non-destructive — probe tokens are removed from KV cache after probing.
228+
* Optional — returns RAC_ERROR_NOT_SUPPORTED if backend doesn't support it.
229+
*
230+
* @param handle Service handle
231+
* @param context Context passage (can be empty string if context is already in KV cache)
232+
* @param query The user question
233+
* @param out_confidence Output: confidence in [0.0, 1.0]
234+
* @return RAC_SUCCESS or error code
235+
*/
236+
RAC_API rac_result_t rac_llm_probe_confidence(rac_handle_t handle, const char* context,
237+
const char* query, float* out_confidence);
238+
239+
/**
240+
* @brief Generate a response from accumulated KV cache state
241+
*
242+
* Unlike rac_llm_generate(), this does NOT clear the KV cache first.
243+
* Use after inject_system_prompt + append_context to generate from accumulated state.
244+
* Optional — returns RAC_ERROR_NOT_SUPPORTED if backend doesn't support it.
245+
*
246+
* @param handle Service handle
247+
* @param query Query/suffix text to append before generation
248+
* @param options Generation options (can be NULL for defaults)
249+
* @param out_result Output: Generation result
250+
* @return RAC_SUCCESS or error code
251+
*/
252+
RAC_API rac_result_t rac_llm_generate_from_context(rac_handle_t handle, const char* query,
253+
const rac_llm_options_t* options,
254+
rac_llm_result_t* out_result);
255+
256+
/**
257+
* @brief Clear all KV cache state
258+
*
259+
* Resets the LLM's context for a fresh adaptive query cycle.
260+
* Optional — returns RAC_ERROR_NOT_SUPPORTED if backend doesn't support it.
261+
*
262+
* @param handle Service handle
263+
* @return RAC_SUCCESS or error code
264+
*/
265+
RAC_API rac_result_t rac_llm_clear_context(rac_handle_t handle);
266+
171267
#ifdef __cplusplus
172268
}
173269
#endif

sdk/runanywhere-commons/include/backends/rag/ort_guards.h renamed to sdk/runanywhere-commons/include/rac/features/rag/ort_guards.h

File renamed without changes.

sdk/runanywhere-commons/include/rac/backends/rac_rag.h renamed to sdk/runanywhere-commons/include/rac/features/rag/rac_rag.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/**
22
* @file rac_rag.h
3-
* @brief RunAnywhere Commons - RAG Backend Public API
3+
* @brief RunAnywhere Commons - RAG Pipeline Public API
44
*
5-
* Registration and control functions for the RAG backend.
5+
* Registration and control functions for the RAG pipeline module.
66
*/
77

88
#ifndef RAC_RAG_H
@@ -16,16 +16,17 @@ extern "C" {
1616
#endif
1717

1818
/**
19-
* @brief Register the RAG backend module
19+
* @brief Register the RAG pipeline module
2020
*
2121
* Must be called before using RAG functionality.
22+
* Also registers the ONNX embeddings service provider if available.
2223
*
2324
* @return RAC_SUCCESS on success, error code otherwise
2425
*/
2526
RAC_API rac_result_t rac_backend_rag_register(void);
2627

2728
/**
28-
* @brief Unregister the RAG backend module
29+
* @brief Unregister the RAG pipeline module
2930
*
3031
* @return RAC_SUCCESS on success, error code otherwise
3132
*/

0 commit comments

Comments
 (0)