Add optional support for global quota check (#4096)

rayandasoriya · Rayan Dasoriya · web-flow · commit d0fb60fb7df6 · 2025-06-06T12:20:26.000Z
Co-authored-by: Rayan Dasoriya &lt;dasoriya@google.com&gt;
diff --git a/community-content/vertex_model_garden/model_oss/notebook_util/common_util.py b/community-content/vertex_model_garden/model_oss/notebook_util/common_util.py
@@ -1,13 +1,14 @@
 """Common util functions for notebook."""
 
 import base64
+from collections.abc import Sequence
 import datetime
 import io
 import json
 import os
 import subprocess
 import time
-from typing import Any, Dict, Sequence
+from typing import Any
 
 from google import auth
 from google.cloud import storage
@@ -283,7 +284,7 @@ def decode_image(
   return image
 
 
-def get_label_map(label_map_yaml_filepath: str) -> Dict[int, str]:
+def get_label_map(label_map_yaml_filepath: str) -> dict[int, str]:
   """Returns class id to label mapping given a filepath to the label map.
 
   Args:
@@ -509,6 +510,17 @@ def get_quota(project_id: str, region: str, resource_id: str) -> int:
   ):
     return -1
   all_regions_data = quota_data[0]["consumerQuotaLimits"][0]["quotaBuckets"]
+
+  # If the quota data does not have dimensions, it is global quota. However,
+  # global quota may be overridden by regional quota. So we need to check the
+  # global quota first.
+  global_quota = -1
+  if (
+      all_regions_data
+      and "dimensions" not in all_regions_data[0]
+      and "effectiveLimit" in all_regions_data[0]
+  ):
+    global_quota = int(all_regions_data[0]["effectiveLimit"])
   for region_data in all_regions_data:
     if (
         region_data.get("dimensions")
@@ -518,12 +530,13 @@ def get_quota(project_id: str, region: str, resource_id: str) -> int:
         return int(region_data["effectiveLimit"])
       else:
         return 0
-  return -1
+  return global_quota
 
 
 def get_resource_id(
     accelerator_type: str,
     is_for_training: bool,
+    is_spot: bool = False,
     is_restricted_image: bool = False,
     is_dynamic_workload_scheduler: bool = False,
 ) -> str:
@@ -533,6 +546,7 @@ def get_resource_id(
     accelerator_type: The accelerator type.
     is_for_training: Whether the resource is used for training. Set false for
       serving use case.
+    is_spot: Whether the resource is used with Spot.
     is_restricted_image: Whether the image is hosted in `vertex-ai-restricted`.
     is_dynamic_workload_scheduler: Whether the resource is used with Dynamic
       Workload Scheduler.
@@ -548,6 +562,7 @@ def get_resource_id(
       "NVIDIA_A100_80GB": "nvidia_a100_80gb_gpus",
       "NVIDIA_H100_80GB": "nvidia_h100_gpus",
       "NVIDIA_H100_MEGA_80GB": "nvidia_h100_mega_gpus",
+      "NVIDIA_H200_141GB": "nvidia_h200_gpus",
       "NVIDIA_TESLA_T4": "nvidia_t4_gpus",
       "TPU_V5e": "tpu_v5e",
       "TPU_V3": "tpu_v3",
@@ -563,6 +578,10 @@ def get_resource_id(
   restricted_image_training_accelerator_map = {
       "NVIDIA_A100_80GB": "restricted_image_training_nvidia_a100_80gb_gpus",
   }
+  spot_serving_accelerator_map = {
+      key: f"custom_model_serving_preemptible_{accelerator_suffix_map[key]}"
+      for key in accelerator_suffix_map
+  }
   serving_accelerator_map = {
       key: f"custom_model_serving_{accelerator_suffix_map[key]}"
       for key in accelerator_suffix_map
@@ -591,8 +610,11 @@ def get_resource_id(
   else:
     if is_dynamic_workload_scheduler:
       raise ValueError("Dynamic Workload Scheduler does not work for serving.")
-    if accelerator_type in serving_accelerator_map:
-      return serving_accelerator_map[accelerator_type]
+    accelerator_map = (
+        spot_serving_accelerator_map if is_spot else serving_accelerator_map
+    )
+    if accelerator_type in accelerator_map:
+      return accelerator_map[accelerator_type]
     else:
       raise ValueError(
           f"Could not find accelerator type: {accelerator_type} for serving."
@@ -605,13 +627,28 @@ def check_quota(
     accelerator_type: str,
     accelerator_count: int,
     is_for_training: bool,
+    is_spot: bool = False,
     is_restricted_image: bool = False,
     is_dynamic_workload_scheduler: bool = False,
-):
-  """Checks if the project and the region has the required quota."""
+) -> None:
+  """Checks if the project and the region has the required quota.
+
+  Args:
+    project_id: The project id.
+    region: The region.
+    accelerator_type: The accelerator type.
+    accelerator_count: The number of accelerators to check quota for.
+    is_for_training: Whether the resource is used for training. Set false for
+      serving use case.
+    is_spot: Whether the resource is used with Spot.
+    is_restricted_image: Whether the image is hosted in `vertex-ai-restricted`.
+    is_dynamic_workload_scheduler: Whether the resource is used with Dynamic
+      Workload Scheduler.
+  """
   resource_id = get_resource_id(
       accelerator_type,
       is_for_training=is_for_training,
+      is_spot=is_spot,
       is_restricted_image=is_restricted_image,
       is_dynamic_workload_scheduler=is_dynamic_workload_scheduler,
   )