Skip to content

Commit afacbe8

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Remove the usage of Service Account and support VPC-SC and refactoring
PiperOrigin-RevId: 769558936
1 parent 74376f2 commit afacbe8

1 file changed

Lines changed: 30 additions & 6 deletions

File tree

notebooks/community/model_garden/model_garden_pytorch_stable_diffusion_gradio.ipynb

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@
104104
"\n",
105105
"REGION = \"\" # @param {type:\"string\"}\n",
106106
"\n",
107+
"# @markdown 4. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus). You can request for quota following the instructions at [\"Request a higher quota\"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).\n",
108+
"\n",
109+
"# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n",
110+
"# @markdown | ----------- | ----------- | ----------- |\n",
111+
"# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n",
112+
"# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
113+
"# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
114+
"# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |\n",
115+
"\n",
107116
"! pip3 install --upgrade gradio==4.29.0 opencv-python\n",
108117
"# Uninstall nest-asyncio and uvloop as a workaround to https://github.com/gradio-app/gradio/issues/8238#issuecomment-2101066984\n",
109118
"! pip3 uninstall --yes nest-asyncio uvloop\n",
@@ -190,13 +199,15 @@
190199
"! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n",
191200
"! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/aiplatform.user\"\n",
192201
"\n",
202+
"# @markdown Set use_dedicated_endpoint to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint). Note that [dedicated endpoint does not support VPC Service Controls](https://cloud.google.com/vertex-ai/docs/predictions/choose-endpoint-type), uncheck the box if you are using VPC-SC.\n",
203+
"use_dedicated_endpoint = True # @param {type:\"boolean\"}\n",
204+
"\n",
193205
"aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)"
194206
]
195207
},
196208
{
197209
"cell_type": "code",
198210
"execution_count": null,
199-
"language": "python",
200211
"metadata": {
201212
"cellView": "form",
202213
"id": "1cc26e68d7b0"
@@ -477,11 +488,17 @@
477488
" publisher_model_id = get_publisher_model_id(model_id)\n",
478489
" task_name = get_task_name(model_id)\n",
479490
"\n",
480-
" return deploy_model_vertex(model_id, publisher_model_id, task_name)\n",
491+
" return deploy_model_vertex(\n",
492+
" model_id, publisher_model_id, task_name, lora_id, use_dedicated_endpoint\n",
493+
" )\n",
481494
"\n",
482495
"\n",
483496
"def deploy_model_vertex(\n",
484-
" model_id: str, publisher_model_id: str | None, task_name: str, lora_id: str = \"\"\n",
497+
" model_id: str,\n",
498+
" publisher_model_id: str | None,\n",
499+
" task_name: str,\n",
500+
" lora_id: str = \"\",\n",
501+
" use_dedicated_endpoint: bool = False,\n",
485502
") -> aiplatform.Endpoint:\n",
486503
" \"\"\"\n",
487504
" Creates a new Vertex prediction endpoint and deploys a model to it.\n",
@@ -501,7 +518,10 @@
501518
" ):\n",
502519
" deploy_model_id = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
503520
"\n",
504-
" endpoint = aiplatform.Endpoint.create(display_name=display_name)\n",
521+
" endpoint = aiplatform.Endpoint.create(\n",
522+
" display_name=display_name,\n",
523+
" dedicated_endpoint_enabled=use_dedicated_endpoint,\n",
524+
" )\n",
505525
" serving_env = {\n",
506526
" \"MODEL_ID\": deploy_model_id,\n",
507527
" \"TASK\": task_name,\n",
@@ -545,7 +565,6 @@
545565
" accelerator_type=accelerator_type,\n",
546566
" accelerator_count=accelerator_count,\n",
547567
" deploy_request_timeout=1800,\n",
548-
" service_account=SERVICE_ACCOUNT,\n",
549568
" sync=False,\n",
550569
" system_labels={\n",
551570
" \"NOTEBOOK_NAME\": \"model_garden_pytorch_stable_diffusion_gradio.ipynb\"\n",
@@ -1317,9 +1336,14 @@
13171336
" model_id=\"stabilityai/stable-diffusion-xl-base-1.0\",\n",
13181337
" lora_id=output_dir,\n",
13191338
" task_name=\"text-to-image-sdxl\",\n",
1339+
" use_dedicated_endpoint=True,\n",
13201340
" )\n",
13211341
"\n",
1322-
" return deploy_model_vertex(model_id=output_dir, task_name=\"text-to-image\")\n",
1342+
" return deploy_model_vertex(\n",
1343+
" model_id=output_dir,\n",
1344+
" task_name=\"text-to-image\",\n",
1345+
" use_dedicated_endpoint=use_dedicated_endpoint,\n",
1346+
" )\n",
13231347
"\n",
13241348
" def dreambooth_start_training(*inputs):\n",
13251349
" [\n",

0 commit comments

Comments
 (0)