Skip to content

Commit a6e69a4

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Formatting and refactoring of llama3_1 deployment
PiperOrigin-RevId: 767043085
1 parent fdaa5a6 commit a6e69a4

1 file changed

Lines changed: 22 additions & 10 deletions

File tree

notebooks/community/model_garden/model_garden_pytorch_llama3_1_deployment.ipynb

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@
138138
"\n",
139139
"# Upgrade Vertex AI SDK.\n",
140140
"! pip3 install --upgrade --quiet 'google-cloud-aiplatform==1.93.1'\n",
141-
"! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n",
142141
"\n",
143142
"import importlib\n",
144143
"import os\n",
@@ -157,7 +156,6 @@
157156
" \"vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util\"\n",
158157
")\n",
159158
"\n",
160-
"LABEL = \"vllm_gpu\"\n",
161159
"models, endpoints = {}, {}\n",
162160
"\n",
163161
"\n",
@@ -395,7 +393,8 @@
395393
" return model, endpoint\n",
396394
"\n",
397395
"\n",
398-
"models[\"hexllm_tpu\"], endpoints[\"hexllm_tpu\"] = deploy_model_hexllm(\n",
396+
"LABEL = \"hexllm_tpu\"\n",
397+
"models[LABEL], endpoints[LABEL] = deploy_model_hexllm(\n",
399398
" model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n",
400399
" model_id=model_id,\n",
401400
" publisher=\"meta\",\n",
@@ -412,7 +411,10 @@
412411
" min_replica_count=min_replica_count,\n",
413412
" max_replica_count=max_replica_count,\n",
414413
" use_dedicated_endpoint=use_dedicated_endpoint,\n",
415-
")"
414+
")\n",
415+
"\n",
416+
"model = models[LABEL]\n",
417+
"endpoint = endpoints[LABEL]"
416418
]
417419
},
418420
{
@@ -692,9 +694,13 @@
692694
"\n",
693695
"\n",
694696
"use_dedicated_endpoint = True # Fast Deployment only supports dedicated endpoints.\n",
695-
"models[\"vllm_fast\"], endpoints[\"vllm_fast\"] = fast_deploy(\n",
697+
"LABEL = \"vllm_fast\"\n",
698+
"models[LABEL], endpoints[LABEL] = fast_deploy(\n",
696699
" \"meta\", \"llama3_1\", \"llama-3.1-8b-instruct\"\n",
697-
")"
700+
")\n",
701+
"\n",
702+
"model = models[LABEL]\n",
703+
"endpoint = endpoints[LABEL]"
698704
]
699705
},
700706
{
@@ -925,6 +931,7 @@
925931
"source": [
926932
"# @title [Option 1] Deploy with Model Garden SDK\n",
927933
"\n",
934+
"LABEL = \"sdk-deploy\"\n",
928935
"# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n",
929936
"from vertexai.preview import model_garden\n",
930937
"\n",
@@ -1167,6 +1174,7 @@
11671174
" return model, endpoint\n",
11681175
"\n",
11691176
"\n",
1177+
"LABEL = \"vllm_gpu\"\n",
11701178
"models[LABEL], endpoints[LABEL] = deploy_model_vllm(\n",
11711179
" model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n",
11721180
" model_id=model_id,\n",
@@ -1194,6 +1202,8 @@
11941202
" is_spot=is_spot,\n",
11951203
")\n",
11961204
"\n",
1205+
"model = models[LABEL]\n",
1206+
"endpoint = endpoints[LABEL]\n",
11971207
"# @markdown Click \"Show Code\" to see more details."
11981208
]
11991209
},
@@ -1473,10 +1483,8 @@
14731483
" return model, endpoint\n",
14741484
"\n",
14751485
"\n",
1476-
"(\n",
1477-
" models[\"optimized_vllm_gpu\"],\n",
1478-
" endpoints[\"optimized_vllm_gpu\"],\n",
1479-
") = deploy_model_optimized_vllm(\n",
1486+
"LABEL = \"optimized_vllm_gpu\"\n",
1487+
"(models[LABEL], endpoints[LABEL],) = deploy_model_optimized_vllm(\n",
14801488
" model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n",
14811489
" model_id=model_id,\n",
14821490
" publisher=\"meta\",\n",
@@ -1488,6 +1496,10 @@
14881496
" max_model_len=max_model_len,\n",
14891497
" use_dedicated_endpoint=use_dedicated_endpoint,\n",
14901498
")\n",
1499+
"\n",
1500+
"model = models[LABEL]\n",
1501+
"endpoint = endpoints[LABEL]\n",
1502+
"\n",
14911503
"# @markdown Click \"Show Code\" to see more details."
14921504
]
14931505
},

0 commit comments

Comments
 (0)