Skip to content

Commit e3320d2

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Update vLLM docker URI in model co-hosting notebook.
PiperOrigin-RevId: 836250727
1 parent 5fc0e03 commit e3320d2

1 file changed

Lines changed: 4 additions & 28 deletions

File tree

notebooks/community/model_garden/model_garden_model_cohost.ipynb

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3435,7 +3435,7 @@
34353435
"outputs": [],
34363436
"source": [
34373437
"# The MG vLLM model co-hosting serving container.\n",
3438-
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-imageplatform/vertex-model-garden/vllm-inference-restricted-ubuntu22.04-py3.12:model-garden.vllm-restricted-x86-release_20251028.02_p0\" # @param {type:\"string\"}"
3438+
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/vllm-serve-gpu:20251028_0916_RC01\" # @param {type:\"string\"}"
34393439
]
34403440
},
34413441
{
@@ -3506,30 +3506,6 @@
35063506
"\n",
35073507
"Sample command:\n",
35083508
"\n",
3509-
"# Local\n",
3510-
"python benchmark_util_multi_model.py \\\n",
3511-
" --model /path/to/modelA,/path/to/modelB \\\n",
3512-
" --gpu-memory-partition 0.45,0.45 \\\n",
3513-
" --total-gpus 8 \\\n",
3514-
" --input-length 1200 \\\n",
3515-
" --output-length 250 \\\n",
3516-
" --num-prompts 200 \\\n",
3517-
" --sonnet-prefix-len 49 \\\n",
3518-
" --concurrencies 1 8 16 \\\n",
3519-
" --max-median-ttft-ms 1000 \\\n",
3520-
" --max-p99-ttft-ms 10000 \\\n",
3521-
" --max-median-tpot-ms 100 \\\n",
3522-
" --max-p99-tpot-ms 1000 \\\n",
3523-
" --docker-uri us-docker.pkg.dev/vertex-imageplatform/vertex-model-garden/vllm-inference-restricted-ubuntu22.04-py3.12:model-garden.vllm-restricted-x86-release_20251028.02_p0 \\\n",
3524-
" --server-init-timeout 300 \\\n",
3525-
" --benchmark-script-path /path/to/benchmark_serving.py \\\n",
3526-
" --dataset-path /path/to/sonnet.txt \\\n",
3527-
" --results-output-path /path/to/benchmark_results.csv \\\n",
3528-
" --server-stats-output-path /path/to/server_stats.csv \\\n",
3529-
" --figure-output-dir /path/to/benchmark_figures/ \\\n",
3530-
" --no-skip-concurrencies-given-latency \\\n",
3531-
" --backend local\n",
3532-
"\n",
35333509
"# Vertex\n",
35343510
"# Authenticate credentials\n",
35353511
"gcloud auth application-default login\n",
@@ -3546,7 +3522,7 @@
35463522
" --max-p99-ttft-ms 10000 \\\n",
35473523
" --max-median-tpot-ms 100 \\\n",
35483524
" --max-p99-tpot-ms 1000 \\\n",
3549-
" --docker-uri us-docker.pkg.dev/vertex-imageplatform/vertex-model-garden/vllm-inference-restricted-ubuntu22.04-py3.12:model-garden.vllm-restricted-x86-release_20251028.02_p0 \\\n",
3525+
" --docker-uri us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/vllm-serve-gpu:20251028_0916_RC01 \\\n",
35503526
" --server-init-timeout 300 \\\n",
35513527
" --benchmark-script-path /path/to/benchmark_serving.py \\\n",
35523528
" --dataset-path /path/to/sonnet.txt \\\n",
@@ -3683,7 +3659,7 @@
36833659
" \"--docker-uri\",\n",
36843660
" type=str,\n",
36853661
" default=(\n",
3686-
" \"us-docker.pkg.dev/vertex-imageplatform/vertex-model-garden/vllm-inference-restricted-ubuntu22.04-py3.12:model-garden.vllm-restricted-x86-release_20251028.02_p0\",\n",
3662+
" \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/vllm-serve-gpu:20251028_0916_RC01\",\n",
36873663
" ),\n",
36883664
" help=\"Docker image URI for the vLLM server.\",\n",
36893665
")\n",
@@ -5852,7 +5828,7 @@
58525828
"HF_TOKEN = \"\" # @param {type:\"string\"}\n",
58535829
"\n",
58545830
"# The MG vLLM serving container supporting model replicas.\n",
5855-
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-imageplatform/vertex-model-garden/vllm-inference-restricted-ubuntu22.04-py3.12:model-garden.vllm-restricted-x86-release_20251028.02_p0\" # @param {type:\"string\"}\n",
5831+
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/vllm-serve-gpu:20251028_0916_RC01\" # @param {type:\"string\"}\n",
58565832
"SERVER_INIT_TIMEOUT = 300 # @param {type:\"integer\"}\n",
58575833
"\n",
58585834
"# The total number of GPUs available on the machine.\n",

0 commit comments

Comments
 (0)