Skip to content

Commit 85fa955

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Update vLLM container version in QwQ deployment notebook.
PiperOrigin-RevId: 778559325
1 parent bbed90a commit 85fa955

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

notebooks/community/model_garden/model_garden_pytorch_qwq_deployment.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,14 +211,14 @@
211211
"source": [
212212
"# @title Deploy\n",
213213
"\n",
214-
"# @markdown This section uploads prebuilt Qwen2/Qwen2.5 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 to 30 minutes to finish depending on the size of the model.\n",
214+
"# @markdown This section uploads prebuilt QwQ models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 to 30 minutes to finish depending on the size of the model.\n",
215215
"\n",
216216
"MODEL_ID = \"QwQ-32B\" # @param [\"QwQ-32B\"] {isTemplate: true}\n",
217217
"model_path_prefix = \"Qwen\"\n",
218218
"model_id = os.path.join(model_path_prefix, MODEL_ID)\n",
219219
"\n",
220220
"# The pre-built serving docker image for vLLM.\n",
221-
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/vllm-inference.cu121.0-6.ubuntu2204.py310\"\n",
221+
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20250506_0916_RC01\"\n",
222222
"\n",
223223
"# @markdown Set `use_dedicated_endpoint` to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).\n",
224224
"use_dedicated_endpoint = True # @param {type:\"boolean\"}\n",
@@ -333,9 +333,10 @@
333333
" vllm_args.append(\n",
334334
" f\"--host-prefix-kv-cache-utilization-target={host_prefix_kv_cache_utilization_target}\"\n",
335335
" )\n",
336+
"\n",
336337
" if enable_yarn_scaling:\n",
337338
" vllm_args.append(\n",
338-
" '--rope-scaling=\\'{\"factor\":4.0,\"original_max_position_embeddings\":32768,\"type\":\"yarn\"}\\''\n",
339+
" '--rope-scaling=\\'{\"factor\": 4.0, \"original_max_position_embeddings\": 32768, \"rope_type\": \"yarn\"}\\''\n",
339340
" )\n",
340341
"\n",
341342
" if model_type:\n",

0 commit comments

Comments
 (0)