Remove the usage of Service Account and support VPC-SC and refactoring

vertex-mg-bot · copybara-github · commit afacbe8ba65b · 2025-06-10T04:03:34.000-07:00
PiperOrigin-RevId: 769558936
diff --git a/notebooks/community/model_garden/model_garden_pytorch_stable_diffusion_gradio.ipynb b/notebooks/community/model_garden/model_garden_pytorch_stable_diffusion_gradio.ipynb
@@ -104,6 +104,15 @@
         "\n",
         "REGION = \"\"  # @param {type:\"string\"}\n",
         "\n",
+        "# @markdown 4. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus). You can request for quota following the instructions at [\"Request a higher quota\"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).\n",
+        "\n",
+        "# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n",
+        "# @markdown | ----------- | ----------- | ----------- |\n",
+        "# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n",
+        "# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |\n",
+        "\n",
         "! pip3 install --upgrade gradio==4.29.0 opencv-python\n",
         "# Uninstall nest-asyncio and uvloop as a workaround to https://github.com/gradio-app/gradio/issues/8238#issuecomment-2101066984\n",
         "! pip3 uninstall --yes nest-asyncio uvloop\n",
@@ -190,13 +199,15 @@
         "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n",
         "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/aiplatform.user\"\n",
         "\n",
+        "# @markdown Set use_dedicated_endpoint to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint). Note that [dedicated endpoint does not support VPC Service Controls](https://cloud.google.com/vertex-ai/docs/predictions/choose-endpoint-type), uncheck the box if you are using VPC-SC.\n",
+        "use_dedicated_endpoint = True  # @param {type:\"boolean\"}\n",
+        "\n",
         "aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
-      "language": "python",
       "metadata": {
         "cellView": "form",
         "id": "1cc26e68d7b0"
@@ -477,11 +488,17 @@
         "    publisher_model_id = get_publisher_model_id(model_id)\n",
         "    task_name = get_task_name(model_id)\n",
         "\n",
-        "    return deploy_model_vertex(model_id, publisher_model_id, task_name)\n",
+        "    return deploy_model_vertex(\n",
+        "        model_id, publisher_model_id, task_name, lora_id, use_dedicated_endpoint\n",
+        "    )\n",
         "\n",
         "\n",
         "def deploy_model_vertex(\n",
-        "    model_id: str, publisher_model_id: str | None, task_name: str, lora_id: str = \"\"\n",
+        "    model_id: str,\n",
+        "    publisher_model_id: str | None,\n",
+        "    task_name: str,\n",
+        "    lora_id: str = \"\",\n",
+        "    use_dedicated_endpoint: bool = False,\n",
         ") -> aiplatform.Endpoint:\n",
         "    \"\"\"\n",
         "    Creates a new Vertex prediction endpoint and deploys a model to it.\n",
@@ -501,7 +518,10 @@
         "    ):\n",
         "        deploy_model_id = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
         "\n",
-        "    endpoint = aiplatform.Endpoint.create(display_name=display_name)\n",
+        "    endpoint = aiplatform.Endpoint.create(\n",
+        "        display_name=display_name,\n",
+        "        dedicated_endpoint_enabled=use_dedicated_endpoint,\n",
+        "    )\n",
         "    serving_env = {\n",
         "        \"MODEL_ID\": deploy_model_id,\n",
         "        \"TASK\": task_name,\n",
@@ -545,7 +565,6 @@
         "        accelerator_type=accelerator_type,\n",
         "        accelerator_count=accelerator_count,\n",
         "        deploy_request_timeout=1800,\n",
-        "        service_account=SERVICE_ACCOUNT,\n",
         "        sync=False,\n",
         "        system_labels={\n",
         "            \"NOTEBOOK_NAME\": \"model_garden_pytorch_stable_diffusion_gradio.ipynb\"\n",
@@ -1317,9 +1336,14 @@
         "                model_id=\"stabilityai/stable-diffusion-xl-base-1.0\",\n",
         "                lora_id=output_dir,\n",
         "                task_name=\"text-to-image-sdxl\",\n",
+        "                use_dedicated_endpoint=True,\n",
         "            )\n",
         "\n",
-        "        return deploy_model_vertex(model_id=output_dir, task_name=\"text-to-image\")\n",
+        "        return deploy_model_vertex(\n",
+        "            model_id=output_dir,\n",
+        "            task_name=\"text-to-image\",\n",
+        "            use_dedicated_endpoint=use_dedicated_endpoint,\n",
+        "        )\n",
         "\n",
         "    def dreambooth_start_training(*inputs):\n",
         "        [\n",