|
34 | 34 | "\n", |
35 | 35 | "<table><tbody><tr>\n", |
36 | 36 | " <td style=\"text-align: center\">\n", |
| 37 | + " <a href=\"https://console.cloud.google.com/vertex-ai/workbench/instances\">\n", |
| 38 | + " <img alt=\"Workbench logo\" src=\"https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32\" width=\"32px\"><br> Run in Workbench\n", |
| 39 | + " </a>\n", |
| 40 | + " </td>\n", |
| 41 | + " <td style=\"text-align: center\">\n", |
37 | 42 | " <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_qwen2_deployment.ipynb\">\n", |
38 | 43 | " <img alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"><br> Run in Colab Enterprise\n", |
39 | 44 | " </a>\n", |
|
123 | 128 | "from google.cloud import aiplatform\n", |
124 | 129 | "\n", |
125 | 130 | "models, endpoints = {}, {}\n", |
126 | | - "LABEL = \"vllm_gpu\"\n", |
127 | 131 | "\n", |
128 | 132 | "if os.environ.get(\"VERTEX_PRODUCT\") != \"COLAB_ENTERPRISE\":\n", |
129 | 133 | " ! pip install --upgrade tensorflow\n", |
|
167 | 171 | { |
168 | 172 | "cell_type": "code", |
169 | 173 | "execution_count": null, |
170 | | - "language": "python", |
171 | 174 | "metadata": { |
172 | 175 | "cellView": "form", |
173 | 176 | "id": "USB7dvYqvNdu" |
|
273 | 276 | "source": [ |
274 | 277 | "# @title [Option 1] Deploy with Model Garden SDK\n", |
275 | 278 | "\n", |
| 279 | + "LABEL = \"sdk-deploy\"\n", |
| 280 | + "\n", |
276 | 281 | "# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n", |
277 | 282 | "from vertexai.preview import model_garden\n", |
278 | 283 | "\n", |
|
435 | 440 | " return model, endpoint\n", |
436 | 441 | "\n", |
437 | 442 | "\n", |
438 | | - "models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n", |
| 443 | + "LABEL = \"custom-deploy\"\n", |
| 444 | + "models[LABEL], endpoints[LABEL] = deploy_model_vllm(\n", |
439 | 445 | " model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n", |
440 | 446 | " model_id=model_id,\n", |
441 | 447 | " publisher=\"qwen\",\n", |
|
505 | 511 | " \"raw_response\": raw_response,\n", |
506 | 512 | " },\n", |
507 | 513 | "]\n", |
508 | | - "response = endpoints[\"vllm_gpu\"].predict(\n", |
| 514 | + "response = endpoints[LABEL].predict(\n", |
509 | 515 | " instances=instances, use_dedicated_endpoint=use_dedicated_endpoint\n", |
510 | 516 | ")\n", |
511 | 517 | "\n", |
|
699 | 705 | " return model, endpoint\n", |
700 | 706 | "\n", |
701 | 707 | "\n", |
702 | | - "models[\"hexllm_tpu\"], endpoints[\"hexllm_tpu\"] = deploy_model_hexllm(\n", |
| 708 | + "LABEL = \"hexllm_tpu\"\n", |
| 709 | + "models[LABEL], endpoints[LABEL] = deploy_model_hexllm(\n", |
703 | 710 | " model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),\n", |
704 | 711 | " model_id=model_id,\n", |
705 | 712 | " publisher=\"qwen\",\n", |
|
0 commit comments