Formatting and refactoring of llama3_1 deployment

vertex-mg-bot · copybara-github · commit a6e69a4561cb · 2025-06-04T02:05:07.000-07:00
PiperOrigin-RevId: 767043085
diff --git a/notebooks/community/model_garden/model_garden_pytorch_llama3_1_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_llama3_1_deployment.ipynb
@@ -138,7 +138,6 @@
         "\n",
         "# Upgrade Vertex AI SDK.\n",
         "! pip3 install --upgrade --quiet 'google-cloud-aiplatform==1.93.1'\n",
-        "! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n",
         "\n",
         "import importlib\n",
         "import os\n",
@@ -157,7 +156,6 @@
         "    \"vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util\"\n",
         ")\n",
         "\n",
-        "LABEL = \"vllm_gpu\"\n",
         "models, endpoints = {}, {}\n",
         "\n",
         "\n",
@@ -395,7 +393,8 @@
         "    return model, endpoint\n",
         "\n",
         "\n",
-        "models[\"hexllm_tpu\"], endpoints[\"hexllm_tpu\"] = deploy_model_hexllm(\n",
+        "LABEL = \"hexllm_tpu\"\n",
+        "models[LABEL], endpoints[LABEL] = deploy_model_hexllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n",
         "    model_id=model_id,\n",
         "    publisher=\"meta\",\n",
@@ -412,7 +411,10 @@
         "    min_replica_count=min_replica_count,\n",
         "    max_replica_count=max_replica_count,\n",
         "    use_dedicated_endpoint=use_dedicated_endpoint,\n",
-        ")"
+        ")\n",
+        "\n",
+        "model = models[LABEL]\n",
+        "endpoint = endpoints[LABEL]"
       ]
     },
     {
@@ -692,9 +694,13 @@
         "\n",
         "\n",
         "use_dedicated_endpoint = True  # Fast Deployment only supports dedicated endpoints.\n",
-        "models[\"vllm_fast\"], endpoints[\"vllm_fast\"] = fast_deploy(\n",
+        "LABEL = \"vllm_fast\"\n",
+        "models[LABEL], endpoints[LABEL] = fast_deploy(\n",
         "    \"meta\", \"llama3_1\", \"llama-3.1-8b-instruct\"\n",
-        ")"
+        ")\n",
+        "\n",
+        "model = models[LABEL]\n",
+        "endpoint = endpoints[LABEL]"
       ]
     },
     {
@@ -925,6 +931,7 @@
       "source": [
         "# @title [Option 1] Deploy with Model Garden SDK\n",
         "\n",
+        "LABEL = \"sdk-deploy\"\n",
         "# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n",
         "from vertexai.preview import model_garden\n",
         "\n",
@@ -1167,6 +1174,7 @@
         "    return model, endpoint\n",
         "\n",
         "\n",
+        "LABEL = \"vllm_gpu\"\n",
         "models[LABEL], endpoints[LABEL] = deploy_model_vllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n",
         "    model_id=model_id,\n",
@@ -1194,6 +1202,8 @@
         "    is_spot=is_spot,\n",
         ")\n",
         "\n",
+        "model = models[LABEL]\n",
+        "endpoint = endpoints[LABEL]\n",
         "# @markdown Click \"Show Code\" to see more details."
       ]
     },
@@ -1473,10 +1483,8 @@
         "    return model, endpoint\n",
         "\n",
         "\n",
-        "(\n",
-        "    models[\"optimized_vllm_gpu\"],\n",
-        "    endpoints[\"optimized_vllm_gpu\"],\n",
-        ") = deploy_model_optimized_vllm(\n",
+        "LABEL = \"optimized_vllm_gpu\"\n",
+        "(models[LABEL], endpoints[LABEL],) = deploy_model_optimized_vllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-serve\"),\n",
         "    model_id=model_id,\n",
         "    publisher=\"meta\",\n",
@@ -1488,6 +1496,10 @@
         "    max_model_len=max_model_len,\n",
         "    use_dedicated_endpoint=use_dedicated_endpoint,\n",
         ")\n",
+        "\n",
+        "model = models[LABEL]\n",
+        "endpoint = endpoints[LABEL]\n",
+        "\n",
         "# @markdown Click \"Show Code\" to see more details."
       ]
     },