Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions lambdas/functions/control-plane/src/aws/runners.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import { DefaultTargetCapacityType, SpotAllocationStrategy } from '@aws-sdk/client-ec2';
import {
DefaultTargetCapacityType,
FleetOnDemandAllocationStrategy,
SpotAllocationStrategy,
} from '@aws-sdk/client-ec2';
import { LambdaRunnerSource } from '../scale-runners/scale-up';

export type RunnerType = 'Org' | 'Repo';
Expand Down Expand Up @@ -38,9 +42,10 @@ export interface RunnerInputParameters {
launchTemplateName: string;
ec2instanceCriteria: {
instanceTypes: string[];
instanceTypePriorities?: Record<string, number>;
targetCapacityType: DefaultTargetCapacityType;
maxSpotPrice?: string;
instanceAllocationStrategy: SpotAllocationStrategy;
instanceAllocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy;
};
numberOfRunners: number;
source: LambdaRunnerSource;
Expand Down
84 changes: 75 additions & 9 deletions lambdas/functions/control-plane/src/aws/runners.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
DescribeInstancesCommand,
type DescribeInstancesResult,
EC2Client,
FleetOnDemandAllocationStrategy,
SpotAllocationStrategy,
TerminateInstancesCommand,
} from '@aws-sdk/client-ec2';
Expand Down Expand Up @@ -389,11 +390,51 @@ describe('create runner', () => {
});

it('calls create fleet of 1 instance with the on-demand capacity', async () => {
await createRunner(createRunnerConfig({ ...defaultRunnerConfig, capacityType: 'on-demand' }));
await createRunner(
createRunnerConfig({ ...defaultRunnerConfig, capacityType: 'on-demand', allocationStrategy: 'lowest-price' }),
);
expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
capacityType: 'on-demand',
allocationStrategy: 'lowest-price',
}),
});
});

it('calls create fleet with on-demand capacity and prioritized allocation strategy', async () => {
await createRunner(
createRunnerConfig({
...defaultRunnerConfig,
capacityType: 'on-demand',
allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED,
}),
);
expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
capacityType: 'on-demand',
allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED,
}),
});
});

it('calls create fleet with custom instance type priorities', async () => {
const priorities = { 'm5.large': 10, 'c5.large': 5 };
await createRunner(
createRunnerConfig({
...defaultRunnerConfig,
capacityType: 'on-demand',
allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED,
instanceTypePriorities: priorities,
}),
);
expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
capacityType: 'on-demand',
allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED,
instanceTypePriorities: priorities,
}),
});
});
Expand Down Expand Up @@ -631,12 +672,13 @@ describe('create runner with errors fail over to OnDemand', () => {
}),
});

// second call with with OnDemand fallback
// second call with with OnDemand fallback, allocation strategy defaults to lowest-price
expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
totalTargetCapacity: 1,
capacityType: 'on-demand',
allocationStrategy: 'lowest-price',
}),
});
});
Expand Down Expand Up @@ -673,12 +715,13 @@ describe('create runner with errors fail over to OnDemand', () => {
}),
});

// second call with with OnDemand failback, capacity is reduced by 1
// second call with with OnDemand failback, capacity is reduced by 1, allocation strategy defaults to lowest-price
expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, {
...expectedCreateFleetRequest({
...defaultExpectedFleetRequestValues,
totalTargetCapacity: 1,
capacityType: 'on-demand',
allocationStrategy: 'lowest-price',
}),
});
});
Expand Down Expand Up @@ -748,7 +791,8 @@ function createFleetMockWithWithOnDemandFallback(errors: string[], instances?: s
interface RunnerConfig {
type: RunnerType;
capacityType: DefaultTargetCapacityType;
allocationStrategy: SpotAllocationStrategy;
allocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy;
instanceTypePriorities?: Record<string, number>;
maxSpotPrice?: string;
amiIdSsmParameterName?: string;
tracingEnabled?: boolean;
Expand All @@ -766,6 +810,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
launchTemplateName: LAUNCH_TEMPLATE,
ec2instanceCriteria: {
instanceTypes: ['m5.large', 'c5.large'],
instanceTypePriorities: runnerConfig.instanceTypePriorities,
targetCapacityType: runnerConfig.capacityType,
maxSpotPrice: runnerConfig.maxSpotPrice,
instanceAllocationStrategy: runnerConfig.allocationStrategy,
Expand All @@ -782,7 +827,8 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
interface ExpectedFleetRequestValues {
type: 'Repo' | 'Org';
capacityType: DefaultTargetCapacityType;
allocationStrategy: SpotAllocationStrategy;
allocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy;
instanceTypePriorities?: Record<string, number>;
maxSpotPrice?: string;
totalTargetCapacity: number;
imageId?: string;
Expand Down Expand Up @@ -815,26 +861,46 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues):
{
InstanceType: 'm5.large',
SubnetId: 'subnet-123',
...(expectedValues.allocationStrategy === 'prioritized' && {
Priority: expectedValues.instanceTypePriorities?.['m5.large'] ?? 0,
}),
},
{
InstanceType: 'c5.large',
SubnetId: 'subnet-123',
...(expectedValues.allocationStrategy === 'prioritized' && {
Priority: expectedValues.instanceTypePriorities?.['c5.large'] ?? 1,
}),
},
{
InstanceType: 'm5.large',
SubnetId: 'subnet-456',
...(expectedValues.allocationStrategy === 'prioritized' && {
Priority: expectedValues.instanceTypePriorities?.['m5.large'] ?? 0,
}),
},
{
InstanceType: 'c5.large',
SubnetId: 'subnet-456',
...(expectedValues.allocationStrategy === 'prioritized' && {
Priority: expectedValues.instanceTypePriorities?.['c5.large'] ?? 1,
}),
},
],
},
],
SpotOptions: {
AllocationStrategy: expectedValues.allocationStrategy,
MaxTotalPrice: expectedValues.maxSpotPrice,
},
...(expectedValues.capacityType === 'spot'
? {
SpotOptions: {
AllocationStrategy: expectedValues.allocationStrategy,
MaxTotalPrice: expectedValues.maxSpotPrice,
},
}
: {
OnDemandOptions: {
AllocationStrategy: expectedValues.allocationStrategy,
},
}),
TagSpecifications: [
{
ResourceType: 'instance',
Expand Down
39 changes: 33 additions & 6 deletions lambdas/functions/control-plane/src/aws/runners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import {
DescribeInstancesResult,
EC2Client,
FleetLaunchTemplateOverridesRequest,
FleetOnDemandAllocationStrategy,
SpotAllocationStrategy,
Tag,
TerminateInstancesCommand,
_InstanceType,
Expand Down Expand Up @@ -125,14 +127,17 @@ function generateFleetOverrides(
subnetIds: string[],
instancesTypes: string[],
amiId?: string,
allocationStrategy?: string,
instanceTypePriorities?: Record<string, number>,
): FleetLaunchTemplateOverridesRequest[] {
const result: FleetLaunchTemplateOverridesRequest[] = [];
subnetIds.forEach((s) => {
instancesTypes.forEach((i) => {
instancesTypes.forEach((i, index) => {
const item: FleetLaunchTemplateOverridesRequest = {
SubnetId: s,
InstanceType: i as _InstanceType,
ImageId: amiId,
...(allocationStrategy === 'prioritized' && { Priority: instanceTypePriorities?.[i] ?? index }),
};
result.push(item);
});
Expand Down Expand Up @@ -187,11 +192,21 @@ async function processFleetResult(
logger.warn(`Create fleet failed, initatiing fall back to on demand instances.`);
logger.debug('Create fleet failed.', { data: fleet.Errors });
const numberOfInstances = runnerParameters.numberOfRunners - instances.length;
const onDemandValidStrategies = ['lowest-price', 'prioritized'];
const failoverAllocationStrategy = onDemandValidStrategies.includes(
runnerParameters.ec2instanceCriteria.instanceAllocationStrategy,
)
? runnerParameters.ec2instanceCriteria.instanceAllocationStrategy
: 'lowest-price';
const instancesOnDemand = await createRunner({
...runnerParameters,
numberOfRunners: numberOfInstances,
onDemandFailoverOnError: ['InsufficientInstanceCapacity'],
ec2instanceCriteria: { ...runnerParameters.ec2instanceCriteria, targetCapacityType: 'on-demand' },
ec2instanceCriteria: {
...runnerParameters.ec2instanceCriteria,
targetCapacityType: 'on-demand',
instanceAllocationStrategy: failoverAllocationStrategy,
},
});
instances.push(...instancesOnDemand);
return instances;
Expand Down Expand Up @@ -265,13 +280,25 @@ async function createInstances(
runnerParameters.subnets,
runnerParameters.ec2instanceCriteria.instanceTypes,
amiIdOverride,
runnerParameters.ec2instanceCriteria.instanceAllocationStrategy,
runnerParameters.ec2instanceCriteria.instanceTypePriorities,
),
},
],
SpotOptions: {
MaxTotalPrice: runnerParameters.ec2instanceCriteria.maxSpotPrice,
AllocationStrategy: runnerParameters.ec2instanceCriteria.instanceAllocationStrategy,
},
...(runnerParameters.ec2instanceCriteria.targetCapacityType === 'spot'
? {
SpotOptions: {
MaxTotalPrice: runnerParameters.ec2instanceCriteria.maxSpotPrice,
AllocationStrategy: runnerParameters.ec2instanceCriteria
.instanceAllocationStrategy as SpotAllocationStrategy,
},
}
: {
OnDemandOptions: {
AllocationStrategy: runnerParameters.ec2instanceCriteria
.instanceAllocationStrategy as FleetOnDemandAllocationStrategy,
},
}),
TargetCapacitySpecification: {
TotalTargetCapacity: runnerParameters.numberOfRunners,
DefaultTargetCapacityType: runnerParameters.ec2instanceCriteria.targetCapacityType,
Expand Down
4 changes: 4 additions & 0 deletions lambdas/functions/control-plane/src/pool/pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ export async function adjust(event: PoolEvent): Promise<void> {
const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME;
const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE;
const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default
const instanceTypePriorities = process.env.INSTANCE_TYPE_PRIORITIES
? (JSON.parse(process.env.INSTANCE_TYPE_PRIORITIES) as Record<string, number>)
: undefined;
const runnerOwner = process.env.RUNNER_OWNER;
const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME;
const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false });
Expand Down Expand Up @@ -92,6 +95,7 @@ export async function adjust(event: PoolEvent): Promise<void> {
{
ec2instanceCriteria: {
instanceTypes,
instanceTypePriorities,
targetCapacityType: instanceTargetCapacityType,
maxSpotPrice: instanceMaxSpotPrice,
instanceAllocationStrategy: instanceAllocationStrategy,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,9 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME;
const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE;
const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default
const instanceTypePriorities = process.env.INSTANCE_TYPE_PRIORITIES
? (JSON.parse(process.env.INSTANCE_TYPE_PRIORITIES) as Record<string, number>)
: undefined;
const enableJobQueuedCheck = yn(process.env.ENABLE_JOB_QUEUED_CHECK, { default: true });
const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME;
const runnerNamePrefix = process.env.RUNNER_NAME_PREFIX || '';
Expand Down Expand Up @@ -497,6 +500,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
{
ec2instanceCriteria: {
instanceTypes,
instanceTypePriorities,
targetCapacityType: instanceTargetCapacityType,
maxSpotPrice: instanceMaxSpotPrice,
instanceAllocationStrategy: instanceAllocationStrategy,
Expand Down
1 change: 1 addition & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ module "runners" {
instance_types = var.instance_types
instance_target_capacity_type = var.instance_target_capacity_type
instance_allocation_strategy = var.instance_allocation_strategy
instance_type_priorities = var.instance_type_priorities
instance_max_spot_price = var.instance_max_spot_price
block_device_mappings = var.block_device_mappings

Expand Down
1 change: 1 addition & 0 deletions modules/multi-runner/runners.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ module "runners" {
instance_types = each.value.runner_config.instance_types
instance_target_capacity_type = each.value.runner_config.instance_target_capacity_type
instance_allocation_strategy = each.value.runner_config.instance_allocation_strategy
instance_type_priorities = each.value.runner_config.instance_type_priorities
instance_max_spot_price = each.value.runner_config.instance_max_spot_price
block_device_mappings = each.value.runner_config.block_device_mappings

Expand Down
4 changes: 3 additions & 1 deletion modules/multi-runner/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ variable "multi_runner_config" {
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_type_priorities = optional(map(number), null)
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
Expand Down Expand Up @@ -214,7 +215,8 @@ variable "multi_runner_config" {
enable_runner_binaries_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable_ssm_on_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance_allocation_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance_allocation_strategy: "The allocation strategy for creating instances. For spot, AWS recommends `capacity-optimized`; for on-demand, use `lowest-price` or `prioritized`. The AWS default is `lowest-price`."
instance_type_priorities: "A map of instance type to priority for the `prioritized` allocation strategy. Lower numbers mean higher priority. If not provided, priorities are assigned based on the order of `instance_types`."
instance_max_spot_price: "Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance_target_capacity_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance_types: "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)."
Expand Down
1 change: 1 addition & 0 deletions modules/runners/pool.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module "pool" {
user_agent = var.user_agent
github_app_parameters = var.github_app_parameters
instance_allocation_strategy = var.instance_allocation_strategy
instance_type_priorities = var.instance_type_priorities
instance_max_spot_price = var.instance_max_spot_price
instance_target_capacity_type = var.instance_target_capacity_type
instance_types = var.instance_types
Expand Down
1 change: 1 addition & 0 deletions modules/runners/pool/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ resource "aws_lambda_function" "pool" {
INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy
INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price
INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type
INSTANCE_TYPE_PRIORITIES = var.config.instance_type_priorities != null ? jsonencode(var.config.instance_type_priorities) : ""
INSTANCE_TYPES = join(",", var.config.instance_types)
LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name
LOG_LEVEL = var.config.lambda.log_level
Expand Down
1 change: 1 addition & 0 deletions modules/runners/pool/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ variable "config" {
})
})
instance_types = list(string)
instance_type_priorities = optional(map(number))
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
Expand Down
1 change: 1 addition & 0 deletions modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ resource "aws_lambda_function" "scale_up" {
INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy
INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price
INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type
INSTANCE_TYPE_PRIORITIES = var.instance_type_priorities != null ? jsonencode(var.instance_type_priorities) : ""
INSTANCE_TYPES = join(",", var.instance_types)
LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name
LOG_LEVEL = var.log_level
Expand Down
10 changes: 8 additions & 2 deletions modules/runners/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,22 @@ variable "instance_target_capacity_type" {
}

variable "instance_allocation_strategy" {
description = "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
description = "The allocation strategy for creating instances. For spot, AWS recommends `capacity-optimized`; for on-demand, use `lowest-price` or `prioritized`. The AWS default is `lowest-price`."
type = string
default = "lowest-price"

validation {
condition = contains(["lowest-price", "diversified", "capacity-optimized", "capacity-optimized-prioritized", "price-capacity-optimized"], var.instance_allocation_strategy)
condition = contains(["lowest-price", "diversified", "capacity-optimized", "capacity-optimized-prioritized", "price-capacity-optimized", "prioritized"], var.instance_allocation_strategy)
error_message = "The instance allocation strategy does not match the allowed values."
}
}

variable "instance_type_priorities" {
description = "A map of instance type to priority for the `prioritized` allocation strategy. Lower numbers mean higher priority. If not provided, priorities are assigned based on the order of `instance_types`."
type = map(number)
default = null
}

variable "instance_max_spot_price" {
description = "Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
type = string
Expand Down
Loading