Skip to content

Commit c12269d

Browse files
committed
Add Genie NPU models (Qwen3 4B, Llama 3.2 1B) with new HuggingFace URLs
- Update NPUChip: new base URL (runanywhere/genie-npu-models), new naming convention ({slug}-genie-w4a16-{chipSuffix}.tar.gz) - Add Llama 3.2 1B Instruct (both 8 Elite and 8 Elite Gen 5) - Add Qwen3 4B (8 Elite Gen 5 only) - Extensible GenieModelDef pattern for easy addition of future models
1 parent 5a6ccc1 commit c12269d

3 files changed

Lines changed: 52 additions & 22 deletions

File tree

  • examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data
  • sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk

examples/android/RunAnywhereAI/app/src/main/java/com/runanywhere/runanywhereai/data/ModelList.kt

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import timber.log.Timber
44
import com.runanywhere.runanywhereai.data.models.AppModel
55
import com.runanywhere.sdk.core.onnx.ONNX
66
import com.runanywhere.sdk.core.types.InferenceFramework
7+
import com.runanywhere.sdk.core.types.NPUChip
78
import com.runanywhere.sdk.llm.llamacpp.LlamaCPP
89
import com.runanywhere.sdk.llm.genie.Genie
910
import com.runanywhere.sdk.public.RunAnywhere
@@ -129,18 +130,44 @@ object ModelList {
129130

130131
// Genie NPU Models — URLs are built dynamically based on detected chipset.
131132
// getChip() returns the NPUChip for this device, or null if unsupported.
133+
// Each entry specifies which chips it supports; only matching models are shown.
134+
135+
private data class GenieModelDef(
136+
val slug: String,
137+
val name: String,
138+
val memoryRequirement: Long,
139+
val supportedChips: Set<NPUChip>,
140+
)
141+
142+
private val genieModelDefs = listOf(
143+
GenieModelDef(
144+
slug = "qwen3-4b",
145+
name = "Qwen3 4B",
146+
memoryRequirement = 2_800_000_000,
147+
supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE_GEN5),
148+
),
149+
GenieModelDef(
150+
slug = "llama-v3.2-1b-instruct",
151+
name = "Llama 3.2 1B Instruct",
152+
memoryRequirement = 1_200_000_000,
153+
supportedChips = setOf(NPUChip.SNAPDRAGON_8_ELITE, NPUChip.SNAPDRAGON_8_ELITE_GEN5),
154+
),
155+
)
156+
132157
private fun genieModels(): List<AppModel> {
133158
val chip = RunAnywhere.getChip() ?: return emptyList()
134-
return listOf(
135-
AppModel(
136-
id = "qwen-npu-${chip.identifier}",
137-
name = "Qwen3 4B (NPU - ${chip.displayName})",
138-
url = chip.downloadUrl("qwen"),
139-
framework = InferenceFramework.GENIE,
140-
category = ModelCategory.LANGUAGE,
141-
memoryRequirement = 2_800_000_000,
142-
),
143-
)
159+
return genieModelDefs
160+
.filter { chip in it.supportedChips }
161+
.map { def ->
162+
AppModel(
163+
id = "${def.slug}-npu-${chip.identifier}",
164+
name = "${def.name} (NPU - ${chip.displayName})",
165+
url = chip.downloadUrl(def.slug),
166+
framework = InferenceFramework.GENIE,
167+
category = ModelCategory.LANGUAGE,
168+
memoryRequirement = def.memoryRequirement,
169+
)
170+
}
144171
}
145172

146173
// VLM

sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/core/types/NPUChip.kt

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,37 @@ package com.runanywhere.sdk.core.types
33
/**
44
* Supported NPU chipsets for on-device Genie model inference.
55
*
6-
* Each chip has an [identifier] used to construct dynamic download URLs
7-
* for chipset-specific NPU model binaries.
6+
* Each chip has an [identifier] used in model IDs and an [npuSuffix] used
7+
* to construct download URLs from the HuggingFace model repository.
88
*
99
* Example URL construction:
1010
* ```
1111
* val chip = RunAnywhere.getChip()
12-
* val url = "${NPUChip.BASE_URL}qwen-${chip.identifier}.zip?download=true"
12+
* val url = chip.downloadUrl("qwen3-4b")
13+
* // → "https://huggingface.co/runanywhere/genie-npu-models/resolve/main/qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
1314
* ```
1415
*/
1516
enum class NPUChip(
1617
val identifier: String,
1718
val displayName: String,
1819
val socModel: String,
20+
val npuSuffix: String,
1921
) {
20-
SNAPDRAGON_8_ELITE("gen1", "Snapdragon 8 Elite", "SM8750"),
21-
SNAPDRAGON_8_ELITE_GEN5("gen2", "Snapdragon 8 Elite Gen 5", "SM8850"),
22+
SNAPDRAGON_8_ELITE("8elite", "Snapdragon 8 Elite", "SM8750", "8elite"),
23+
SNAPDRAGON_8_ELITE_GEN5("8elite-gen5", "Snapdragon 8 Elite Gen 5", "SM8850", "8elite-gen5"),
2224
;
2325

2426
/**
2527
* Build a HuggingFace download URL for this chip.
26-
* @param modelName Model prefix (e.g. "qwen") → produces "qwen-gen1.zip"
28+
* @param modelSlug Model slug (e.g. "qwen3-4b") → produces
29+
* "qwen3-4b-genie-w4a16-8elite-gen5.tar.gz"
2730
*/
28-
fun downloadUrl(modelName: String): String =
29-
"${BASE_URL}${modelName}-${identifier}.zip?download=true"
31+
fun downloadUrl(modelSlug: String): String =
32+
"${BASE_URL}${modelSlug}-genie-w4a16-${npuSuffix}.tar.gz"
3033

3134
companion object {
3235
/** Base URL for NPU model downloads on HuggingFace. */
33-
const val BASE_URL = "https://huggingface.co/Void2377/npu-models/resolve/main/"
36+
const val BASE_URL = "https://huggingface.co/runanywhere/genie-npu-models/resolve/main/"
3437

3538
/**
3639
* Match an NPU chip from a SoC model string (e.g. "SM8750").

sdk/runanywhere-kotlin/src/commonMain/kotlin/com/runanywhere/sdk/public/extensions/RunAnywhere+Device.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ import com.runanywhere.sdk.public.RunAnywhere
99
* Returns the [NPUChip] if the device has a supported Qualcomm SoC,
1010
* or null if the device does not support NPU inference.
1111
*
12-
* Use [NPUChip.identifier] to construct chipset-specific download URLs:
12+
* Use [NPUChip.downloadUrl] to construct chipset-specific download URLs:
1313
* ```kotlin
1414
* val chip = RunAnywhere.getChip()
1515
* if (chip != null) {
16-
* val url = "https://example.com/models/qwen-${chip.identifier}.zip"
17-
* RunAnywhere.registerModel(id = "qwen-npu", name = "Qwen NPU", url = url, ...)
16+
* val url = chip.downloadUrl("qwen3-4b")
17+
* RunAnywhere.registerModel(id = "qwen3-4b-npu", name = "Qwen3 4B NPU", url = url, ...)
1818
* }
1919
* ```
2020
*/

0 commit comments

Comments
 (0)