Formatting and refactoring of Qwen2 deployment notebook

vertex-mg-bot · copybara-github · commit fdaa5a6b90ce · 2025-06-04T02:03:46.000-07:00
PiperOrigin-RevId: 767042986
diff --git a/notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb
@@ -34,6 +34,11 @@
         "\n",
         "<table><tbody><tr>\n",
         "  <td style=\"text-align: center\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/instances\">\n",
+        "      <img alt=\"Workbench logo\" src=\"https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32\" width=\"32px\"><br> Run in Workbench\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td style=\"text-align: center\">\n",
         "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_qwen2_deployment.ipynb\">\n",
         "      <img alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"><br> Run in Colab Enterprise\n",
         "    </a>\n",
@@ -123,7 +128,6 @@
         "from google.cloud import aiplatform\n",
         "\n",
         "models, endpoints = {}, {}\n",
-        "LABEL = \"vllm_gpu\"\n",
         "\n",
         "if os.environ.get(\"VERTEX_PRODUCT\") != \"COLAB_ENTERPRISE\":\n",
         "    ! pip install --upgrade tensorflow\n",
@@ -167,7 +171,6 @@
     {
       "cell_type": "code",
       "execution_count": null,
-      "language": "python",
       "metadata": {
         "cellView": "form",
         "id": "USB7dvYqvNdu"
@@ -273,6 +276,8 @@
       "source": [
         "# @title [Option 1] Deploy with Model Garden SDK\n",
         "\n",
+        "LABEL = \"sdk-deploy\"\n",
+        "\n",
         "# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n",
         "from vertexai.preview import model_garden\n",
         "\n",
@@ -435,7 +440,8 @@
         "    return model, endpoint\n",
         "\n",
         "\n",
-        "models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n",
+        "LABEL = \"custom-deploy\"\n",
+        "models[LABEL], endpoints[LABEL] = deploy_model_vllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n",
         "    model_id=model_id,\n",
         "    publisher=\"qwen\",\n",
@@ -505,7 +511,7 @@
         "        \"raw_response\": raw_response,\n",
         "    },\n",
         "]\n",
-        "response = endpoints[\"vllm_gpu\"].predict(\n",
+        "response = endpoints[LABEL].predict(\n",
         "    instances=instances, use_dedicated_endpoint=use_dedicated_endpoint\n",
         ")\n",
         "\n",
@@ -699,7 +705,8 @@
         "    return model, endpoint\n",
         "\n",
         "\n",
-        "models[\"hexllm_tpu\"], endpoints[\"hexllm_tpu\"] = deploy_model_hexllm(\n",
+        "LABEL = \"hexllm_tpu\"\n",
+        "models[LABEL], endpoints[LABEL] = deploy_model_hexllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n",
         "    model_id=model_id,\n",
         "    publisher=\"qwen\",\n",