|
104 | 104 | "\n", |
105 | 105 | "REGION = \"\" # @param {type:\"string\"}\n", |
106 | 106 | "\n", |
| 107 | + "# @markdown 4. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus). You can request for quota following the instructions at [\"Request a higher quota\"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).\n", |
| 108 | + "\n", |
| 109 | + "# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n", |
| 110 | + "# @markdown | ----------- | ----------- | ----------- |\n", |
| 111 | + "# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n", |
| 112 | + "# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n", |
| 113 | + "# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n", |
| 114 | + "# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |\n", |
| 115 | + "\n", |
107 | 116 | "! pip3 install --upgrade gradio==4.29.0 opencv-python\n", |
108 | 117 | "# Uninstall nest-asyncio and uvloop as a workaround to https://github.com/gradio-app/gradio/issues/8238#issuecomment-2101066984\n", |
109 | 118 | "! pip3 uninstall --yes nest-asyncio uvloop\n", |
|
190 | 199 | "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n", |
191 | 200 | "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/aiplatform.user\"\n", |
192 | 201 | "\n", |
| 202 | + "# @markdown Set use_dedicated_endpoint to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint). Note that [dedicated endpoint does not support VPC Service Controls](https://cloud.google.com/vertex-ai/docs/predictions/choose-endpoint-type), uncheck the box if you are using VPC-SC.\n", |
| 203 | + "use_dedicated_endpoint = True # @param {type:\"boolean\"}\n", |
| 204 | + "\n", |
193 | 205 | "aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)" |
194 | 206 | ] |
195 | 207 | }, |
196 | 208 | { |
197 | 209 | "cell_type": "code", |
198 | 210 | "execution_count": null, |
199 | | - "language": "python", |
200 | 211 | "metadata": { |
201 | 212 | "cellView": "form", |
202 | 213 | "id": "1cc26e68d7b0" |
|
477 | 488 | " publisher_model_id = get_publisher_model_id(model_id)\n", |
478 | 489 | " task_name = get_task_name(model_id)\n", |
479 | 490 | "\n", |
480 | | - " return deploy_model_vertex(model_id, publisher_model_id, task_name)\n", |
| 491 | + " return deploy_model_vertex(\n", |
| 492 | + " model_id, publisher_model_id, task_name, lora_id, use_dedicated_endpoint\n", |
| 493 | + " )\n", |
481 | 494 | "\n", |
482 | 495 | "\n", |
483 | 496 | "def deploy_model_vertex(\n", |
484 | | - " model_id: str, publisher_model_id: str | None, task_name: str, lora_id: str = \"\"\n", |
| 497 | + " model_id: str,\n", |
| 498 | + " publisher_model_id: str | None,\n", |
| 499 | + " task_name: str,\n", |
| 500 | + " lora_id: str = \"\",\n", |
| 501 | + " use_dedicated_endpoint: bool = False,\n", |
485 | 502 | ") -> aiplatform.Endpoint:\n", |
486 | 503 | " \"\"\"\n", |
487 | 504 | " Creates a new Vertex prediction endpoint and deploys a model to it.\n", |
|
501 | 518 | " ):\n", |
502 | 519 | " deploy_model_id = \"stabilityai/stable-diffusion-xl-base-1.0\"\n", |
503 | 520 | "\n", |
504 | | - " endpoint = aiplatform.Endpoint.create(display_name=display_name)\n", |
| 521 | + " endpoint = aiplatform.Endpoint.create(\n", |
| 522 | + " display_name=display_name,\n", |
| 523 | + " dedicated_endpoint_enabled=use_dedicated_endpoint,\n", |
| 524 | + " )\n", |
505 | 525 | " serving_env = {\n", |
506 | 526 | " \"MODEL_ID\": deploy_model_id,\n", |
507 | 527 | " \"TASK\": task_name,\n", |
|
545 | 565 | " accelerator_type=accelerator_type,\n", |
546 | 566 | " accelerator_count=accelerator_count,\n", |
547 | 567 | " deploy_request_timeout=1800,\n", |
548 | | - " service_account=SERVICE_ACCOUNT,\n", |
549 | 568 | " sync=False,\n", |
550 | 569 | " system_labels={\n", |
551 | 570 | " \"NOTEBOOK_NAME\": \"model_garden_pytorch_stable_diffusion_gradio.ipynb\"\n", |
|
1317 | 1336 | " model_id=\"stabilityai/stable-diffusion-xl-base-1.0\",\n", |
1318 | 1337 | " lora_id=output_dir,\n", |
1319 | 1338 | " task_name=\"text-to-image-sdxl\",\n", |
| 1339 | + " use_dedicated_endpoint=True,\n", |
1320 | 1340 | " )\n", |
1321 | 1341 | "\n", |
1322 | | - " return deploy_model_vertex(model_id=output_dir, task_name=\"text-to-image\")\n", |
| 1342 | + " return deploy_model_vertex(\n", |
| 1343 | + " model_id=output_dir,\n", |
| 1344 | + " task_name=\"text-to-image\",\n", |
| 1345 | + " use_dedicated_endpoint=use_dedicated_endpoint,\n", |
| 1346 | + " )\n", |
1323 | 1347 | "\n", |
1324 | 1348 | " def dreambooth_start_training(*inputs):\n", |
1325 | 1349 | " [\n", |
|
0 commit comments