Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sdk/runanywhere-commons/exports/RACommons.exports
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,13 @@ _rac_llm_component_load_model
_rac_llm_component_supports_streaming
_rac_llm_component_unload

# LLM Component - Structured Output
_rac_llm_component_generate_structured
_rac_llm_component_generate_structured_stream

# LLM Service - Grammar
_rac_llm_json_schema_to_grammar

# LLM Component - LoRA
_rac_llm_component_load_lora
_rac_llm_component_remove_lora
Expand Down
20 changes: 20 additions & 0 deletions sdk/runanywhere-commons/include/rac/backends/rac_llm_llamacpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,26 @@ RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_generate_from_context(
*/
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_clear_context(rac_handle_t handle);

// =============================================================================
// JSON SCHEMA → GBNF GRAMMAR CONVERSION
// =============================================================================

/**
* Convert a JSON Schema string to a GBNF grammar string for constrained decoding.
*
* Uses llama.cpp's built-in json-schema-to-grammar converter. The resulting
* grammar can be passed via rac_llm_options_t.grammar for grammar-constrained
* token generation.
*
* @param handle Service handle (from rac_llm_llamacpp_create)
* @param json_schema JSON Schema string
* @param out_grammar Output: GBNF grammar string (caller must free with rac_free)
* @return RAC_SUCCESS or error code
*/
RAC_LLAMACPP_API rac_result_t rac_llm_llamacpp_json_schema_to_grammar(rac_handle_t handle,
const char* json_schema,
char** out_grammar);

// =============================================================================
// BACKEND REGISTRATION
// =============================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,51 @@ RAC_API rac_result_t rac_llm_component_generate(rac_handle_t handle, const char*
const rac_llm_options_t* options,
rac_llm_result_t* out_result);

// =============================================================================
// STRUCTURED OUTPUT - Grammar-constrained generation
// =============================================================================

/**
* @brief Generate structured output with grammar-constrained decoding
*
* Converts JSON schema to GBNF grammar, applies grammar constraint during
* token generation so the LLM can only produce valid JSON matching the schema.
* Falls back to prompt-only mode if grammar conversion is not supported.
*
* @param handle Component handle
* @param prompt Input prompt
* @param options Generation options (can be NULL for defaults)
* @param so_config Structured output config with JSON schema and fallback settings
* @param out_result Output: Generation result (text will be valid JSON)
* @return RAC_SUCCESS or error code
*/
RAC_API rac_result_t rac_llm_component_generate_structured(
rac_handle_t handle, const char* prompt, const rac_llm_options_t* options,
const rac_structured_output_config_t* so_config, rac_llm_result_t* out_result);

/**
* @brief Generate structured output with streaming and grammar constraints
*
* Same as generate_structured but with token-by-token streaming callbacks.
* Each emitted token is guaranteed to conform to the grammar.
*
* @param handle Component handle
* @param prompt Input prompt
* @param options Generation options (can be NULL for defaults)
* @param so_config Structured output config with JSON schema
* @param token_callback Called for each generated token
* @param complete_callback Called when generation completes
* @param error_callback Called on error
* @param user_data User context passed to callbacks
* @return RAC_SUCCESS or error code
*/
RAC_API rac_result_t rac_llm_component_generate_structured_stream(
rac_handle_t handle, const char* prompt, const rac_llm_options_t* options,
const rac_structured_output_config_t* so_config,
rac_llm_component_token_callback_fn token_callback,
rac_llm_component_complete_callback_fn complete_callback,
rac_llm_component_error_callback_fn error_callback, void* user_data);

/**
* @brief Check if streaming is supported
*
Expand Down
21 changes: 21 additions & 0 deletions sdk/runanywhere-commons/include/rac/features/llm/rac_llm_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ typedef struct rac_llm_service_ops {

/** Clear all KV cache state (optional, NULL if not supported) */
rac_result_t (*clear_context)(void* impl);

/**
* Convert JSON Schema to GBNF grammar string (optional, NULL if not supported).
* Caller must free out_grammar with rac_free().
*/
rac_result_t (*json_schema_to_grammar)(void* impl, const char* json_schema, char** out_grammar);
} rac_llm_service_ops_t;

/**
Expand Down Expand Up @@ -185,6 +191,21 @@ RAC_API void rac_llm_destroy(rac_handle_t handle);
*/
RAC_API void rac_llm_result_free(rac_llm_result_t* result);

/**
* @brief Convert JSON Schema to GBNF grammar string
*
* Routes through service registry to the backend's json_schema_to_grammar op.
* The resulting GBNF grammar can be passed in rac_llm_options_t.grammar
* for grammar-constrained decoding.
*
* @param handle Service handle
* @param json_schema JSON Schema string
* @param out_grammar Output: GBNF grammar string (caller must free with rac_free)
* @return RAC_SUCCESS or RAC_ERROR_NOT_SUPPORTED if backend doesn't support grammar
*/
RAC_API rac_result_t rac_llm_json_schema_to_grammar(rac_handle_t handle, const char* json_schema,
char** out_grammar);

// =============================================================================
// ADAPTIVE CONTEXT API - For RAG and similar pipelines
// =============================================================================
Expand Down
33 changes: 31 additions & 2 deletions sdk/runanywhere-commons/include/rac/features/llm/rac_llm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ typedef struct rac_llm_options {

/** System prompt (can be NULL) */
const char* system_prompt;

/** GBNF grammar string for constrained decoding (can be NULL for unconstrained) */
const char* grammar;
} rac_llm_options_t;

/**
Expand All @@ -104,7 +107,8 @@ static const rac_llm_options_t RAC_LLM_OPTIONS_DEFAULT = {.max_tokens = 100,
.stop_sequences = RAC_NULL,
.num_stop_sequences = 0,
.streaming_enabled = RAC_FALSE,
.system_prompt = RAC_NULL};
.system_prompt = RAC_NULL,
.grammar = RAC_NULL};

// =============================================================================
// RESULT - Mirrors Swift's LLMGenerationResult
Expand Down Expand Up @@ -209,6 +213,18 @@ static const rac_thinking_tag_pattern_t RAC_THINKING_TAG_FULL = {.opening_tag =
// STRUCTURED OUTPUT - Mirrors Swift's StructuredOutputConfig
// =============================================================================

/**
* @brief Fallback strategy when grammar-constrained structured output fails
*/
typedef enum rac_structured_output_fallback {
/** Return raw text output (no parsing attempt) */
RAC_STRUCTURED_OUTPUT_FALLBACK_RAW = 0,
/** Retry generation with grammar constraint (default) */
RAC_STRUCTURED_OUTPUT_FALLBACK_RETRY = 1,
/** Fall back to prompt-only mode (no grammar constraint) */
RAC_STRUCTURED_OUTPUT_FALLBACK_PROMPT_ONLY = 2
} rac_structured_output_fallback_t;

/**
* @brief Structured output configuration
*
Expand All @@ -223,13 +239,26 @@ typedef struct rac_structured_output_config {

/** Whether to include the schema in the prompt */
rac_bool_t include_schema_in_prompt;

/** Enable GBNF grammar-constrained decoding (default: true when json_schema is set) */
rac_bool_t use_grammar;

/** Maximum retry attempts on failure (default: 3) */
int32_t max_retries;

/** Fallback strategy on failure (default: RETRY) */
rac_structured_output_fallback_t fallback;
} rac_structured_output_config_t;

/**
* @brief Default structured output configuration
*/
static const rac_structured_output_config_t RAC_STRUCTURED_OUTPUT_DEFAULT = {
.json_schema = RAC_NULL, .include_schema_in_prompt = RAC_TRUE};
.json_schema = RAC_NULL,
.include_schema_in_prompt = RAC_TRUE,
.use_grammar = RAC_TRUE,
.max_retries = 3,
.fallback = RAC_STRUCTURED_OUTPUT_FALLBACK_RETRY};

/**
* @brief Structured output validation result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeDestroy(
JNIEXPORT jstring JNICALL
Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeGenerate(
JNIEnv* env, jclass clazz,
jlong handle, jstring prompt, jint maxTokens, jfloat temperature) {
jlong handle, jstring prompt, jint maxTokens, jfloat temperature,
jstring grammar) {
(void)clazz;

if (handle == 0) {
Expand All @@ -240,12 +241,24 @@ Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeGenerate(
options.max_tokens = maxTokens;
options.temperature = temperature;

// Wire grammar field for constrained decoding
const char* grammarStr = nullptr;
if (grammar != nullptr) {
grammarStr = env->GetStringUTFChars(grammar, nullptr);
if (grammarStr && grammarStr[0] != '\0') {
options.grammar = grammarStr;
}
}

rac_llm_result_t result = {};
rac_result_t status = rac_llm_llamacpp_generate(
reinterpret_cast<rac_handle_t>(handle),
promptStr, &options, &result);

env->ReleaseStringUTFChars(prompt, promptStr);
if (grammarStr) {
env->ReleaseStringUTFChars(grammar, grammarStr);
}
Comment on lines +246 to +261
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify all JNI string acquisitions in this file and inspect local null-handling branches.
rg -n -C3 'GetStringUTFChars\(' sdk/runanywhere-commons/src/backends/llamacpp/jni/rac_backend_llamacpp_jni.cpp

Repository: RunanywhereAI/runanywhere-sdks

Length of output: 1146


Fail fast when GetStringUTFChars for grammar returns null.

At Line 247, GetStringUTFChars can fail and leave a pending Java exception. The current flow skips grammar assignment but continues native work; it should return immediately after releasing promptStr, consistent with how modelPath (line 174), prompt (line 231), and jsonSchema (line 293) are handled elsewhere in the file.

🔧 Suggested fix
     const char* grammarStr = nullptr;
     if (grammar != nullptr) {
         grammarStr = env->GetStringUTFChars(grammar, nullptr);
+        if (!grammarStr) {
+            env->ReleaseStringUTFChars(prompt, promptStr);
+            LOGe("nativeGenerate: Failed to get grammar");
+            return nullptr;
+        }
         if (grammarStr[0] != '\0') {
             options.grammar = grammarStr;
         }
     }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@sdk/runanywhere-commons/src/backends/llamacpp/jni/rac_backend_llamacpp_jni.cpp`
around lines 246 - 261, The code must fail fast if
env->GetStringUTFChars(grammar, ...) returns null: after calling
env->GetStringUTFChars for grammar (grammarStr), detect a null return, release
the previously acquired promptStr via env->ReleaseStringUTFChars(prompt,
promptStr), and immediately return (or propagate an error) instead of continuing
to call rac_llm_llamacpp_generate; update the block around grammar/grammarStr
and the call to rac_llm_llamacpp_generate to mirror the existing null-handling
pattern used for modelPath, prompt, and jsonSchema so a pending Java exception
is respected and native work is not performed when grammar conversion fails.


if (status != RAC_SUCCESS) {
LOGe("nativeGenerate: Failed with status %d", status);
Expand All @@ -263,6 +276,43 @@ Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeGenerate(
return output;
}

/**
* Convert JSON Schema to GBNF grammar string
*/
JNIEXPORT jstring JNICALL
Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeJsonSchemaToGrammar(
JNIEnv* env, jclass clazz,
jlong handle, jstring jsonSchema) {
(void)clazz;

if (handle == 0) {
LOGe("nativeJsonSchemaToGrammar: Invalid handle");
return nullptr;
}

const char* schemaStr = env->GetStringUTFChars(jsonSchema, nullptr);
if (!schemaStr) {
LOGe("nativeJsonSchemaToGrammar: Failed to get schema");
return nullptr;
}

char* grammarOut = nullptr;
rac_result_t status = rac_llm_llamacpp_json_schema_to_grammar(
reinterpret_cast<rac_handle_t>(handle),
schemaStr, &grammarOut);

env->ReleaseStringUTFChars(jsonSchema, schemaStr);

if (status != RAC_SUCCESS || !grammarOut) {
LOGe("nativeJsonSchemaToGrammar: Failed with status %d", status);
return nullptr;
}

jstring result = env->NewStringUTF(grammarOut);
free(grammarOut);
Comment on lines +311 to +312
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 free instead of rac_free violates API contract

Both rac_llm_llamacpp_json_schema_to_grammar and rac_llm_json_schema_to_grammar document their output pointer as "caller must free with rac_free()". Here and in llm_component.cpp lines 822 and 878, the raw free() is called on memory owned by the RAC API. If the allocator strategy ever changes (e.g., a custom arena), this will silently corrupt memory.

Suggested change
jstring result = env->NewStringUTF(grammarOut);
free(grammarOut);
jstring result = env->NewStringUTF(grammarOut);
rac_free(grammarOut);
return result;
Prompt To Fix With AI
This is a comment left during a code review.
Path: sdk/runanywhere-commons/src/backends/llamacpp/jni/rac_backend_llamacpp_jni.cpp
Line: 311-312

Comment:
**`free` instead of `rac_free` violates API contract**

Both `rac_llm_llamacpp_json_schema_to_grammar` and `rac_llm_json_schema_to_grammar` document their output pointer as "caller must free with `rac_free()`". Here and in `llm_component.cpp` lines 822 and 878, the raw `free()` is called on memory owned by the RAC API. If the allocator strategy ever changes (e.g., a custom arena), this will silently corrupt memory.

```suggestion
    jstring result = env->NewStringUTF(grammarOut);
    rac_free(grammarOut);
    return result;
```

How can I resolve this? If you propose a fix, please make it concise.

return result;
}

/**
* Cancel ongoing generation
*/
Expand Down
Loading
Loading