refactor: remove custom friom variable and add default values

edersonbrilhante · edersonbrilhante · commit dc6e8d82edf1 · 2026-01-13T19:06:50.000+01:00
diff --git a/README.md b/README.md
@@ -117,7 +117,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh)
 | <a name="input_block_device_mappings"></a> [block\_device\_mappings](#input\_block\_device\_mappings) | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`. | <pre>list(object({<br/>    delete_on_termination = optional(bool, true)<br/>    device_name           = optional(string, "/dev/xvda")<br/>    encrypted             = optional(bool, true)<br/>    iops                  = optional(number)<br/>    kms_key_id            = optional(string)<br/>    snapshot_id           = optional(string)<br/>    throughput            = optional(number)<br/>    volume_size           = number<br/>    volume_type           = optional(string, "gp3")<br/>  }))</pre> | <pre>[<br/>  {<br/>    "volume_size": 30<br/>  }<br/>]</pre> | no |
 | <a name="input_cloudwatch_config"></a> [cloudwatch\_config](#input\_cloudwatch\_config) | (optional) Replaces the module's default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no |
 | <a name="input_create_service_linked_role_spot"></a> [create\_service\_linked\_role\_spot](#input\_create\_service\_linked\_role\_spot) | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no |
-| <a name="input_custom_scale_errors"></a> [custom\_scale\_errors](#input\_custom\_scale\_errors) | List of aws error codesthat should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts | `list(string)` | `[]` | no |
+| <a name="input_scale_errors"></a> [custom\_scale\_errors](#input\_custom\_scale\_errors) | List of aws error codesthat should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts | `list(string)` | `[]` | no |
 | <a name="input_delay_webhook_event"></a> [delay\_webhook\_event](#input\_delay\_webhook\_event) | The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event. | `number` | `30` | no |
 | <a name="input_disable_runner_autoupdate"></a> [disable\_runner\_autoupdate](#input\_disable\_runner\_autoupdate) | Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/) | `bool` | `false` | no |
 | <a name="input_enable_ami_housekeeper"></a> [enable\_ami\_housekeeper](#input\_enable\_ami\_housekeeper) | Option to disable the lambda to clean up old AMIs. | `bool` | `false` | no |
diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts
@@ -45,5 +45,5 @@ export interface RunnerInputParameters {
   amiIdSsmParameterName?: string;
   tracingEnabled?: boolean;
   onDemandFailoverOnError?: string[];
-  customScaleErrors?: string[];
+  scaleErrors: string[];
 }
diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts
@@ -461,19 +461,6 @@ describe('create runner with errors', () => {
     expect(mockSSMClient).not.toHaveReceivedCommand(PutParameterCommand);
   });
 
-  it('test ScaleError with custom scale error.', async () => {
-    createFleetMockWithErrors(['CustomAWSError']);
-
-    await expect(
-      createRunner(createRunnerConfig({ ...defaultRunnerConfig, customScaleErrors: ['CustomAWSError'] })),
-    ).rejects.toBeInstanceOf(ScaleError);
-    expect(mockEC2Client).toHaveReceivedCommandWith(
-      CreateFleetCommand,
-      expectedCreateFleetRequest(defaultExpectedFleetRequestValues),
-    );
-    expect(mockSSMClient).not.toHaveReceivedCommand(PutParameterCommand);
-  });
-
   it('test ScaleError with multiple error.', async () => {
     createFleetMockWithErrors(['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'NotMappedError']);
 
@@ -716,7 +703,7 @@ interface RunnerConfig {
   amiIdSsmParameterName?: string;
   tracingEnabled?: boolean;
   onDemandFailoverOnError?: string[];
-  customScaleErrors?: string[];
+  scaleErrors: string[];
 }
 
 function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
@@ -736,7 +723,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
     amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName,
     tracingEnabled: runnerConfig.tracingEnabled,
     onDemandFailoverOnError: runnerConfig.onDemandFailoverOnError,
-    customScaleErrors: runnerConfig.customScaleErrors,
+    scaleErrors: runnerConfig.scaleErrors,
   };
 }
 
diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts
@@ -197,23 +197,7 @@ async function processFleetResult(
     return instances;
   }
 
-  // Educated guess of errors that would make sense to retry based on the list
-  // https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
-  const defaultScaleErrors = [
-    'UnfulfillableCapacity',
-    'MaxSpotInstanceCountExceeded',
-    'TargetCapacityLimitExceededException',
-    'RequestLimitExceeded',
-    'ResourceLimitExceeded',
-    'MaxSpotInstanceCountExceeded',
-    'MaxSpotFleetRequestCountExceeded',
-    'InsufficientInstanceCapacity',
-  ];
-
-  const scaleErrors =
-    runnerParameters.customScaleErrors && runnerParameters.customScaleErrors.length > 0
-      ? runnerParameters.customScaleErrors
-      : defaultScaleErrors;
+  const scaleErrors = runnerParameters.scaleErrors;
 
   const failedCount = countScaleErrors(errors, scaleErrors);
   if (failedCount > 0) {
diff --git a/lambdas/functions/control-plane/src/modules.d.ts b/lambdas/functions/control-plane/src/modules.d.ts
@@ -3,7 +3,7 @@ declare namespace NodeJS {
     AWS_REGION: string;
     ENABLE_METRIC_GITHUB_APP_RATE_LIMIT: string;
     ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS: string;
-    CUSTOM_SCALE_ERRORS: string;
+    SCALE_ERRORS: string;
     ENVIRONMENT: string;
     GHES_URL: string;
     JOB_RETRY_CONFIG: string;
diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts
@@ -41,9 +41,7 @@ export async function adjust(event: PoolEvent): Promise<void> {
   const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS
     ? (JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS) as [string])
     : [];
-  const customScaleErrors = process.env.CUSTOM_SCALE_ERRORS
-    ? (JSON.parse(process.env.CUSTOM_SCALE_ERRORS) as [string])
-    : [];
+  const scaleErrors = JSON.parse(process.env.SCALE_ERRORS) as [string];
 
   const { ghesApiUrl, ghesBaseUrl } = getGitHubEnterpriseApiUrl();
 
@@ -98,7 +96,7 @@ export async function adjust(event: PoolEvent): Promise<void> {
         amiIdSsmParameterName,
         tracingEnabled,
         onDemandFailoverOnError,
-        customScaleErrors,
+        scaleErrors,
       },
       topUp,
       githubInstallationClient,
diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts
@@ -105,7 +105,7 @@ const EXPECTED_RUNNER_PARAMS: RunnerInputParameters = {
   subnets: ['subnet-123'],
   tracingEnabled: false,
   onDemandFailoverOnError: [],
-  customScaleErrors: [],
+  scaleErrors: ['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'TargetCapacityLimitExceededException'],
 };
 let expectedRunnerParams: RunnerInputParameters;
 
@@ -123,7 +123,8 @@ function setDefaults() {
   process.env.INSTANCE_TYPES = 'm5.large';
   process.env.INSTANCE_TARGET_CAPACITY_TYPE = 'spot';
   process.env.ENABLE_ON_DEMAND_FAILOVER = undefined;
-  process.env.CUSTOM_SCALE_ERRORS = undefined;
+  process.env.SCALE_ERRORS =
+    '["UnfulfillableCapacity","MaxSpotInstanceCountExceeded","TargetCapacityLimitExceededException"]';
 }
 
 beforeEach(() => {
@@ -813,11 +814,11 @@ describe('scaleUp with public GH', () => {
 
     it('creates a runner with correct config and labels and custom scale errors enabled.', async () => {
       process.env.RUNNER_LABELS = 'label1,label2';
-      process.env.CUSTOM_SCALE_ERRORS = JSON.stringify(['RequestLimitExceeded']);
+      process.env.SCALE_ERRORS = JSON.stringify(['RequestLimitExceeded']);
       await scaleUpModule.scaleUp(TEST_DATA);
       expect(createRunner).toBeCalledWith({
         ...expectedRunnerParams,
-        customScaleErrors: ['RequestLimitExceeded'],
+        scaleErrors: ['RequestLimitExceeded'],
       });
     });
 
diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts
@@ -62,7 +62,7 @@ interface CreateEC2RunnerConfig {
   amiIdSsmParameterName?: string;
   tracingEnabled?: boolean;
   onDemandFailoverOnError?: string[];
-  customScaleErrors?: string[];
+  scaleErrors: string[];
 }
 
 function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfig, token: string) {
@@ -256,9 +256,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
   const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS
     ? (JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS) as [string])
     : [];
-  const customScaleErrors = process.env.CUSTOM_SCALE_ERRORS
-    ? (JSON.parse(process.env.CUSTOM_SCALE_ERRORS) as [string])
-    : [];
+  const scaleErrors = JSON.parse(process.env.SCALE_ERRORS) as [string];
 
   const { ghesApiUrl, ghesBaseUrl } = getGitHubEnterpriseApiUrl();
 
@@ -435,7 +433,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
         amiIdSsmParameterName,
         tracingEnabled,
         onDemandFailoverOnError,
-        customScaleErrors,
+        scaleErrors,
       },
       newRunners,
       githubInstallationClient,
diff --git a/main.tf b/main.tf
@@ -187,7 +187,7 @@ module "runners" {
   enable_jit_config                    = var.enable_jit_config
   enable_job_queued_check              = var.enable_job_queued_check
   enable_on_demand_failover_for_errors = var.enable_runner_on_demand_failover_for_errors
-  custom_scale_errors                  = var.custom_scale_errors
+  scale_errors                         = var.scale_errors
   disable_runner_autoupdate            = var.disable_runner_autoupdate
   enable_managed_runner_security_group = var.enable_managed_runner_security_group
   enable_runner_detailed_monitoring    = var.enable_runner_detailed_monitoring
diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf
@@ -32,7 +32,7 @@ module "runners" {
   github_app_parameters                = local.github_app_parameters
   ebs_optimized                        = each.value.runner_config.ebs_optimized
   enable_on_demand_failover_for_errors = each.value.runner_config.enable_on_demand_failover_for_errors
-  custom_scale_errors                  = each.value.runner_config.custom_scale_errors
+  scale_errors                         = each.value.runner_config.scale_errors
   enable_organization_runners          = each.value.runner_config.enable_organization_runners
   enable_ephemeral_runners             = each.value.runner_config.enable_ephemeral_runners
   enable_jit_config                    = each.value.runner_config.enable_jit_config
diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf
@@ -71,15 +71,25 @@ variable "multi_runner_config" {
         id_ssm_parameter_arn = optional(string, null)
         kms_key_arn          = optional(string, null)
       }), null)
-      create_service_linked_role_spot         = optional(bool, false)
-      credit_specification                    = optional(string, null)
-      delay_webhook_event                     = optional(number, 30)
-      disable_runner_autoupdate               = optional(bool, false)
-      ebs_optimized                           = optional(bool, false)
-      enable_ephemeral_runners                = optional(bool, false)
-      enable_job_queued_check                 = optional(bool, null)
-      enable_on_demand_failover_for_errors    = optional(list(string), [])
-      custom_scale_errors                     = optional(list(string), [])
+      create_service_linked_role_spot      = optional(bool, false)
+      credit_specification                 = optional(string, null)
+      delay_webhook_event                  = optional(number, 30)
+      disable_runner_autoupdate            = optional(bool, false)
+      ebs_optimized                        = optional(bool, false)
+      enable_ephemeral_runners             = optional(bool, false)
+      enable_job_queued_check              = optional(bool, null)
+      enable_on_demand_failover_for_errors = optional(list(string), [])
+      scale_errors = optional(list(string), [
+        "UnfulfillableCapacity",
+        "MaxSpotInstanceCountExceeded",
+        "TargetCapacityLimitExceededException",
+        "RequestLimitExceeded",
+        "ResourceLimitExceeded",
+        "MaxSpotInstanceCountExceeded",
+        "MaxSpotFleetRequestCountExceeded",
+        "InsufficientInstanceCapacity",
+        "InsufficientCapacityOnHost",
+      ])
       enable_organization_runners             = optional(bool, false)
       enable_runner_binaries_syncer           = optional(bool, true)
       enable_ssm_on_runners                   = optional(bool, false)
@@ -198,7 +208,7 @@ variable "multi_runner_config" {
         enable_ephemeral_runners: "Enable ephemeral runners, runners will only be used once."
         enable_job_queued_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT configuration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
         enable_on_demand_failover_for_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
-        custom_scale_errors: "List of aws error codesthat should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts"
+        scale_errors: "List of aws error codes that should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts"
         enable_organization_runners: "Register runners to organization, instead of repo level"
         enable_runner_binaries_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
         enable_ssm_on_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
diff --git a/modules/runners/README.md b/modules/runners/README.md
@@ -145,7 +145,7 @@ yarn run dist
 | <a name="input_cpu_options"></a> [cpu\_options](#input\_cpu\_options) | The CPU options for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#cpu-options for details. Note that not all instance types support CPU options, see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-optimize-cpu.html#instance-cpu-options | <pre>object({<br/>    core_count       = number<br/>    threads_per_core = number<br/>  })</pre> | `null` | no |
 | <a name="input_create_service_linked_role_spot"></a> [create\_service\_linked\_role\_spot](#input\_create\_service\_linked\_role\_spot) | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no |
 | <a name="input_credit_specification"></a> [credit\_specification](#input\_credit\_specification) | The credit option for CPU usage of a T instance. Can be unset, "standard" or "unlimited". | `string` | `null` | no |
-| <a name="input_custom_scale_errors"></a> [custom\_scale\_errors](#input\_custom\_scale\_errors) | List of aws error codesthat should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts | `list(string)` | `[]` | no |
+| <a name="input_scale_errors"></a> [custom\_scale\_errors](#input\_custom\_scale\_errors) | List of aws error codesthat should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts | `list(string)` | `[]` | no |
 | <a name="input_disable_runner_autoupdate"></a> [disable\_runner\_autoupdate](#input\_disable\_runner\_autoupdate) | Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/) | `bool` | `false` | no |
 | <a name="input_ebs_optimized"></a> [ebs\_optimized](#input\_ebs\_optimized) | The EC2 EBS optimized configuration. | `bool` | `false` | no |
 | <a name="input_egress_rules"></a> [egress\_rules](#input\_egress\_rules) | List of egress rules for the GitHub runner instances. | <pre>list(object({<br/>    cidr_blocks      = list(string)<br/>    ipv6_cidr_blocks = list(string)<br/>    prefix_list_ids  = list(string)<br/>    from_port        = number<br/>    protocol         = string<br/>    security_groups  = list(string)<br/>    self             = bool<br/>    to_port          = number<br/>    description      = string<br/>  }))</pre> | <pre>[<br/>  {<br/>    "cidr_blocks": [<br/>      "0.0.0.0/0"<br/>    ],<br/>    "description": null,<br/>    "from_port": 0,<br/>    "ipv6_cidr_blocks": [<br/>      "::/0"<br/>    ],<br/>    "prefix_list_ids": null,<br/>    "protocol": "-1",<br/>    "security_groups": null,<br/>    "self": null,<br/>    "to_port": 0<br/>  }<br/>]</pre> | no |
diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf
@@ -42,7 +42,7 @@ module "pool" {
       ephemeral                            = var.enable_ephemeral_runners
       enable_jit_config                    = var.enable_jit_config
       enable_on_demand_failover_for_errors = var.enable_on_demand_failover_for_errors
-      custom_scale_errors                  = var.custom_scale_errors
+      scale_errors                         = var.scale_errors
       boot_time_in_minutes                 = var.runner_boot_time_in_minutes
       labels                               = var.runner_labels
       launch_template                      = aws_launch_template.runner
diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md
diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf
diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf
diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf
diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf
diff --git a/variables.tf b/variables.tf

Original file line number	Diff line number	Diff line change
`@@ -45,5 +45,5 @@ export interface RunnerInputParameters {`
`45`	`45`	`amiIdSsmParameterName?: string;`
`46`	`46`	`tracingEnabled?: boolean;`
`47`	`47`	`onDemandFailoverOnError?: string[];`
`48`		`- customScaleErrors?: string[];`
	`48`	`+ scaleErrors: string[];`
`49`	`49`	`}`