Skip to content

Commit 5fc0e03

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Use separate regions for training, evaluation, and deployment in Llama 3.1 finetuning notebook
PiperOrigin-RevId: 835030738
1 parent ff2a162 commit 5fc0e03

1 file changed

Lines changed: 26 additions & 7 deletions

File tree

notebooks/community/model_garden/model_garden_pytorch_llama3_1_finetuning.ipynb

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@
139139
"\n",
140140
"REGION = \"\" # @param {type:\"string\"}\n",
141141
"\n",
142-
"# Import the necessary packages\n",
142+
"# Import the necessary packages.\n",
143143
"! rm -rf vertex-ai-samples && git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n",
144144
"! cd vertex-ai-samples && git reset --hard 7ae13b346a72ee2a2dc8152dd40c6ddd72d6c810\n",
145145
"\n",
@@ -266,9 +266,7 @@
266266
" VERTEX_AI_MODEL_GARDEN_LLAMA3_1\n",
267267
" ), \"Click the agreement of Llama3.1 in Vertex AI Model Garden, and get the GCS path of the model artifacts.\"\n",
268268
"\n",
269-
"MODEL_BUCKET = VERTEX_AI_MODEL_GARDEN_LLAMA3_1\n",
270-
"\n",
271-
"# @markdown ---"
269+
"MODEL_BUCKET = VERTEX_AI_MODEL_GARDEN_LLAMA3_1"
272270
]
273271
},
274272
{
@@ -451,6 +449,13 @@
451449
"# @markdown Acceletor type to use for training.\n",
452450
"training_accelerator_type = \"NVIDIA_A100_80GB\" # @param [\"NVIDIA_A100_80GB\", \"NVIDIA_H100_80GB\"]\n",
453451
"\n",
452+
"# @markdown Set the Training Region. If not set, it will be set to default region.\n",
453+
"TRAINING_REGION = \"\" # @param {type: \"string\"}\n",
454+
"if not TRAINING_REGION:\n",
455+
" TRAINING_REGION = REGION\n",
456+
"\n",
457+
"aiplatform.init(location=TRAINING_REGION)\n",
458+
"\n",
454459
"# The pre-built training docker image.\n",
455460
"if training_accelerator_type == \"NVIDIA_A100_80GB\":\n",
456461
" repo = \"us-docker.pkg.dev/vertex-ai-restricted\"\n",
@@ -544,7 +549,7 @@
544549
"\n",
545550
"common_util.check_quota(\n",
546551
" project_id=PROJECT_ID,\n",
547-
" region=REGION,\n",
552+
" region=TRAINING_REGION,\n",
548553
" accelerator_type=training_accelerator_type,\n",
549554
" accelerator_count=per_node_accelerator_count * replica_count,\n",
550555
" is_for_training=True,\n",
@@ -701,6 +706,13 @@
701706
"# @markdown Set `RUN_EVALUATION` to False to skip the evaluation job.\n",
702707
"RUN_EVALUATION = True # @param {type:\"boolean\"}\n",
703708
"\n",
709+
"# @markdown Set the Evaluation Region. If not set, it will be set to default region.\n",
710+
"EVAL_REGION = \"\" # @param {type: \"string\"}\n",
711+
"if not EVAL_REGION:\n",
712+
" EVAL_REGION = REGION\n",
713+
"\n",
714+
"aiplatform.init(location=EVAL_REGION)\n",
715+
"\n",
704716
"if \"8b\" in base_model_id.lower():\n",
705717
" eval_machine_type = \"g2-standard-24\"\n",
706718
" eval_accelerator_type = \"NVIDIA_L4\"\n",
@@ -759,7 +771,7 @@
759771
" ]\n",
760772
" common_util.check_quota(\n",
761773
" project_id=PROJECT_ID,\n",
762-
" region=REGION,\n",
774+
" region=EVAL_REGION,\n",
763775
" accelerator_type=eval_accelerator_type,\n",
764776
" accelerator_count=eval_accelerator_count,\n",
765777
" is_for_training=True,\n",
@@ -804,6 +816,13 @@
804816
"# The pre-built serving docker image for vLLM.\n",
805817
"VLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20250116_0916_RC00\"\n",
806818
"\n",
819+
"# @markdown Set the Deployment Region. If not set, it will be set to default region.\n",
820+
"DEPLOY_REGION = \"\" # @param {type: \"string\"}\n",
821+
"if not DEPLOY_REGION:\n",
822+
" DEPLOY_REGION = REGION\n",
823+
"\n",
824+
"aiplatform.init(location=DEPLOY_REGION)\n",
825+
"\n",
807826
"# Find Vertex AI prediction supported accelerators and regions [here](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute).\n",
808827
"if \"8b\" in base_model_id.lower():\n",
809828
" machine_type = \"g2-standard-12\"\n",
@@ -822,7 +841,7 @@
822841
"\n",
823842
"common_util.check_quota(\n",
824843
" project_id=PROJECT_ID,\n",
825-
" region=REGION,\n",
844+
" region=DEPLOY_REGION,\n",
826845
" accelerator_type=accelerator_type,\n",
827846
" accelerator_count=per_node_accelerator_count,\n",
828847
" is_for_training=False,\n",

0 commit comments

Comments
 (0)