Skip to content

Commit bab9c39

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Add new variants to qwen3-vl
PiperOrigin-RevId: 823383168
1 parent 447affc commit bab9c39

1 file changed

Lines changed: 6 additions & 8 deletions

File tree

notebooks/community/model_garden/model_garden_pytorch_qwen3_vl.ipynb

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@
237237
},
238238
"outputs": [],
239239
"source": [
240-
"model_version = \"qwen3-vl-30b-a3b-instruct\" # @param [\"qwen3-vl-235b-a22b-instruct\", \"qwen3-vl-235b-a22b-instruct-fp8\", \"qwen3-vl-235b-a22b-thinking\", \"qwen3-vl-235b-a22b-thinking-fp8\", \"qwen3-vl-30b-a3b-instruct\", \"qwen3-vl-30b-a3b-instruct-fp8\", \"qwen3-vl-30b-a3b-thinking\", \"qwen3-vl-30b-a3b-thinking-fp8\"] {isTemplate:true}\n",
240+
"model_version = \"qwen3-vl-8b-instruct\" # @param [\"qwen3-vl-235b-a22b-instruct\", \"qwen3-vl-235b-a22b-instruct-fp8\", \"qwen3-vl-235b-a22b-thinking\", \"qwen3-vl-235b-a22b-thinking-fp8\", \"qwen3-vl-30b-a3b-instruct\", \"qwen3-vl-30b-a3b-instruct-fp8\", \"qwen3-vl-30b-a3b-thinking\", \"qwen3-vl-30b-a3b-thinking-fp8\", \"qwen3-vl-32b-instruct\", \"qwen3-vl-32b-instruct-fp8\", \"qwen3-vl-32b-thinking\", \"qwen3-vl-32b-thinking-fp8\", \"qwen3-vl-4b-instruct\", \"qwen3-vl-4b-instruct-fp8\", \"qwen3-vl-4b-thinking\", \"qwen3-vl-4b-thinking-fp8\", \"qwen3-vl-8b-instruct\", \"qwen3-vl-8b-instruct-fp8\", \"qwen3-vl-8b-thinking\", \"qwen3-vl-8b-thinking-fp8\"] {isTemplate:true}\n",
241241
"MODEL_NAME = f\"qwen/qwen3-vl@{model_version}\""
242242
]
243243
},
@@ -383,9 +383,9 @@
383383
" accept_eula=True,\n",
384384
" use_dedicated_endpoint=use_dedicated_endpoint,\n",
385385
" serving_container_image_uri=\"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20251003_0916_RC01\",\n",
386-
" machine_type=\"a3-highgpu-2g\",\n",
386+
" machine_type=\"a3-highgpu-1g\",\n",
387387
" accelerator_type=\"NVIDIA_H100_80GB\",\n",
388-
" accelerator_count=2,\n",
388+
" accelerator_count=1,\n",
389389
")"
390390
]
391391
},
@@ -424,7 +424,7 @@
424424
{
425425
"cell_type": "markdown",
426426
"metadata": {
427-
"id": "P9rp6hTLgif2"
427+
"id": "scQowXXcD8Fe"
428428
},
429429
"source": [
430430
"## Inference"
@@ -439,16 +439,14 @@
439439
},
440440
"outputs": [],
441441
"source": [
442-
"# @title Chat completion\n",
443-
"\n",
442+
"# @title Inference\n",
444443
"if use_dedicated_endpoint:\n",
445444
" DEDICATED_ENDPOINT_DNS = endpoint.gca_resource.dedicated_endpoint_dns\n",
446445
"ENDPOINT_RESOURCE_NAME = endpoint.resource_name\n",
447446
"\n",
448447
"# @markdown Because the Qwen3 models generate detailed reasoning steps, the output is expected to be long. We recommend using streaming for a better generation experience.\n",
449-
"# @title Chat Completions Inference\n",
450448
"\n",
451-
"# @title Chat Completions Inference\n",
449+
"# @title Inference\n",
452450
"\n",
453451
"# @markdown Once deployment succeeds, you can send requests to the endpoint using the OpenAI SDK.\n",
454452
"\n",

0 commit comments

Comments
 (0)