|
139 | 139 | " \"vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util\"\n", |
140 | 140 | ")\n", |
141 | 141 | "\n", |
142 | | - "LABEL = \"biomedclip_serve\"\n", |
143 | 142 | "models, endpoints = {}, {}\n", |
144 | 143 | "\n", |
145 | 144 | "# Get the default cloud project id.\n", |
|
216 | 215 | "source": [ |
217 | 216 | "# @title [Option 1] Deploy with Model Garden SDK\n", |
218 | 217 | "\n", |
| 218 | + "LABEL = \"sdk-deploy\"\n", |
219 | 219 | "# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.\n", |
220 | 220 | "from vertexai.preview import model_garden\n", |
221 | 221 | "\n", |
|
258 | 258 | " machine_type: str,\n", |
259 | 259 | " accelerator_type: str,\n", |
260 | 260 | " accelerator_count: int,\n", |
| 261 | + " use_dedicated_endpoint: bool = False,\n", |
261 | 262 | ") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n", |
262 | 263 | " \"\"\"Deploys trained models into Vertex AI.\"\"\"\n", |
263 | | - " endpoint = aiplatform.Endpoint.create(display_name=f\"{model_name}-endpoint\")\n", |
| 264 | + " endpoint = aiplatform.Endpoint.create(\n", |
| 265 | + " display_name=f\"{model_name}-endpoint\",\n", |
| 266 | + " dedicated_endpoint_enabled=use_dedicated_endpoint,\n", |
| 267 | + " )\n", |
264 | 268 | " serving_env = {\n", |
265 | 269 | " \"MODEL\": model_id,\n", |
266 | 270 | " \"TASK\": task,\n", |
|
293 | 297 | " return model, endpoint\n", |
294 | 298 | "\n", |
295 | 299 | "\n", |
| 300 | + "LABEL = \"open-clip-deploy\"\n", |
| 301 | + "\n", |
296 | 302 | "models[LABEL], endpoints[LABEL] = deploy_model(\n", |
297 | 303 | " model_name=common_util.get_job_name_with_datetime(prefix=\"biomedclip-serve\"),\n", |
298 | 304 | " model_id=model_id,\n", |
|
301 | 307 | " machine_type=machine_type,\n", |
302 | 308 | " accelerator_type=accelerator_type,\n", |
303 | 309 | " accelerator_count=1,\n", |
304 | | - ")" |
| 310 | + " use_dedicated_endpoint=use_dedicated_endpoint,\n", |
| 311 | + ")\n", |
| 312 | + "\n", |
| 313 | + "model = models[LABEL]\n", |
| 314 | + "endpoint = endpoints[LABEL]" |
305 | 315 | ] |
306 | 316 | }, |
307 | 317 | { |
|
364 | 374 | " {\"text\": \"This is a photo of hematoxylin and eosin histopathology\"},\n", |
365 | 375 | " {\"text\": \"This is a photo of pie chart\"},\n", |
366 | 376 | "]\n", |
367 | | - "response = endpoints[LABEL].predict(instances=instances)\n", |
| 377 | + "response = endpoint.predict(\n", |
| 378 | + " instances=instances, use_dedicated_endpoint=use_dedicated_endpoint\n", |
| 379 | + ")\n", |
368 | 380 | "\n", |
369 | 381 | "print(response.predictions)\n", |
370 | 382 | "\n", |
|
0 commit comments