|
138 | 138 | "\n", |
139 | 139 | "# Upgrade Vertex AI SDK.\n", |
140 | 140 | "! pip3 install --upgrade --quiet 'google-cloud-aiplatform==1.93.1'\n", |
141 | | - "! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n", |
142 | 141 | "\n", |
143 | 142 | "import importlib\n", |
144 | 143 | "import os\n", |
|
157 | 156 | " \"vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util\"\n", |
158 | 157 | ")\n", |
159 | 158 | "\n", |
160 | | - "LABEL = \"vllm_gpu\"\n", |
161 | 159 | "models, endpoints = {}, {}\n", |
162 | 160 | "\n", |
163 | 161 | "\n", |
|
395 | 393 | " return model, endpoint\n", |
396 | 394 | "\n", |
397 | 395 | "\n", |
398 | | - "models[\"hexllm_tpu\"], endpoints[\"hexllm_tpu\"] = deploy_model_hexllm(\n", |
| 396 | + "LABEL = \"hexllm_tpu\"\n", |
| 397 | + "models[LABEL], endpoints[LABEL] = deploy_model_hexllm(\n", |
399 | 398 | " model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n", |
400 | 399 | " model_id=model_id,\n", |
401 | 400 | " publisher=\"meta\",\n", |
|
412 | 411 | " min_replica_count=min_replica_count,\n", |
413 | 412 | " max_replica_count=max_replica_count,\n", |
414 | 413 | " use_dedicated_endpoint=use_dedicated_endpoint,\n", |
415 | | - ")" |
| 414 | + ")\n", |
| 415 | + "\n", |
| 416 | + "model = models[LABEL]\n", |
| 417 | + "endpoint = endpoints[LABEL]" |
416 | 418 | ] |
417 | 419 | }, |
418 | 420 | { |
|
692 | 694 | "\n", |
693 | 695 | "\n", |
694 | 696 | "use_dedicated_endpoint = True # Fast Deployment only supports dedicated endpoints.\n", |
695 | | - "models[\"vllm_fast\"], endpoints[\"vllm_fast\"] = fast_deploy(\n", |
| 697 | + "LABEL = \"vllm_fast\"\n", |
| 698 | + "models[LABEL], endpoints[LABEL] = fast_deploy(\n", |
696 | 699 | " \"meta\", \"llama3_1\", \"llama-3.1-8b-instruct\"\n", |
697 | | - ")" |
| 700 | + ")\n", |
| 701 | + "\n", |
| 702 | + "model = models[LABEL]\n", |
| 703 | + "endpoint = endpoints[LABEL]" |
698 | 704 | ] |
699 | 705 | }, |
700 | 706 | { |
|
925 | 931 | "source": [ |
926 | 932 | "# @title [Option 1] Deploy with Model Garden SDK\n", |
927 | 933 | "\n", |
| 934 | + "LABEL = \"sdk-deploy\"\n", |
928 | 935 | "# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n", |
929 | 936 | "from vertexai.preview import model_garden\n", |
930 | 937 | "\n", |
|
1167 | 1174 | " return model, endpoint\n", |
1168 | 1175 | "\n", |
1169 | 1176 | "\n", |
| 1177 | + "LABEL = \"vllm_gpu\"\n", |
1170 | 1178 | "models[LABEL], endpoints[LABEL] = deploy_model_vllm(\n", |
1171 | 1179 | " model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n", |
1172 | 1180 | " model_id=model_id,\n", |
|
1194 | 1202 | " is_spot=is_spot,\n", |
1195 | 1203 | ")\n", |
1196 | 1204 | "\n", |
| 1205 | + "model = models[LABEL]\n", |
| 1206 | + "endpoint = endpoints[LABEL]\n", |
1197 | 1207 | "# @markdown Click \"Show Code\" to see more details." |
1198 | 1208 | ] |
1199 | 1209 | }, |
|
1473 | 1483 | " return model, endpoint\n", |
1474 | 1484 | "\n", |
1475 | 1485 | "\n", |
1476 | | - "(\n", |
1477 | | - " models[\"optimized_vllm_gpu\"],\n", |
1478 | | - " endpoints[\"optimized_vllm_gpu\"],\n", |
1479 | | - ") = deploy_model_optimized_vllm(\n", |
| 1486 | + "LABEL = \"optimized_vllm_gpu\"\n", |
| 1487 | + "(models[LABEL], endpoints[LABEL],) = deploy_model_optimized_vllm(\n", |
1480 | 1488 | " model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n", |
1481 | 1489 | " model_id=model_id,\n", |
1482 | 1490 | " publisher=\"meta\",\n", |
|
1488 | 1496 | " max_model_len=max_model_len,\n", |
1489 | 1497 | " use_dedicated_endpoint=use_dedicated_endpoint,\n", |
1490 | 1498 | ")\n", |
| 1499 | + "\n", |
| 1500 | + "model = models[LABEL]\n", |
| 1501 | + "endpoint = endpoints[LABEL]\n", |
| 1502 | + "\n", |
1491 | 1503 | "# @markdown Click \"Show Code\" to see more details." |
1492 | 1504 | ] |
1493 | 1505 | }, |
|
0 commit comments