diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts index c891500f27..d199391aab 100644 --- a/lambdas/functions/control-plane/src/aws/runners.d.ts +++ b/lambdas/functions/control-plane/src/aws/runners.d.ts @@ -1,4 +1,8 @@ -import { DefaultTargetCapacityType, SpotAllocationStrategy } from '@aws-sdk/client-ec2'; +import { + DefaultTargetCapacityType, + FleetOnDemandAllocationStrategy, + SpotAllocationStrategy, +} from '@aws-sdk/client-ec2'; import { LambdaRunnerSource } from '../scale-runners/scale-up'; export type RunnerType = 'Org' | 'Repo'; @@ -38,9 +42,10 @@ export interface RunnerInputParameters { launchTemplateName: string; ec2instanceCriteria: { instanceTypes: string[]; + instanceTypePriorities?: Record; targetCapacityType: DefaultTargetCapacityType; maxSpotPrice?: string; - instanceAllocationStrategy: SpotAllocationStrategy; + instanceAllocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy; }; numberOfRunners: number; source: LambdaRunnerSource; diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index 4243e4b06b..10cc75d116 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -10,6 +10,7 @@ import { DescribeInstancesCommand, type DescribeInstancesResult, EC2Client, + FleetOnDemandAllocationStrategy, SpotAllocationStrategy, TerminateInstancesCommand, } from '@aws-sdk/client-ec2'; @@ -389,11 +390,51 @@ describe('create runner', () => { }); it('calls create fleet of 1 instance with the on-demand capacity', async () => { - await createRunner(createRunnerConfig({ ...defaultRunnerConfig, capacityType: 'on-demand' })); + await createRunner( + createRunnerConfig({ ...defaultRunnerConfig, capacityType: 'on-demand', allocationStrategy: 'lowest-price' }), + ); + expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, { + ...expectedCreateFleetRequest({ + ...defaultExpectedFleetRequestValues, + capacityType: 'on-demand', + allocationStrategy: 'lowest-price', + }), + }); + }); + + it('calls create fleet with on-demand capacity and prioritized allocation strategy', async () => { + await createRunner( + createRunnerConfig({ + ...defaultRunnerConfig, + capacityType: 'on-demand', + allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED, + }), + ); expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, { ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, capacityType: 'on-demand', + allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED, + }), + }); + }); + + it('calls create fleet with custom instance type priorities', async () => { + const priorities = { 'm5.large': 10, 'c5.large': 5 }; + await createRunner( + createRunnerConfig({ + ...defaultRunnerConfig, + capacityType: 'on-demand', + allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED, + instanceTypePriorities: priorities, + }), + ); + expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, { + ...expectedCreateFleetRequest({ + ...defaultExpectedFleetRequestValues, + capacityType: 'on-demand', + allocationStrategy: FleetOnDemandAllocationStrategy.PRIORITIZED, + instanceTypePriorities: priorities, }), }); }); @@ -631,12 +672,13 @@ describe('create runner with errors fail over to OnDemand', () => { }), }); - // second call with with OnDemand fallback + // second call with with OnDemand fallback, allocation strategy defaults to lowest-price expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, { ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, totalTargetCapacity: 1, capacityType: 'on-demand', + allocationStrategy: 'lowest-price', }), }); }); @@ -673,12 +715,13 @@ describe('create runner with errors fail over to OnDemand', () => { }), }); - // second call with with OnDemand failback, capacity is reduced by 1 + // second call with with OnDemand failback, capacity is reduced by 1, allocation strategy defaults to lowest-price expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, { ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, totalTargetCapacity: 1, capacityType: 'on-demand', + allocationStrategy: 'lowest-price', }), }); }); @@ -748,7 +791,8 @@ function createFleetMockWithWithOnDemandFallback(errors: string[], instances?: s interface RunnerConfig { type: RunnerType; capacityType: DefaultTargetCapacityType; - allocationStrategy: SpotAllocationStrategy; + allocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy; + instanceTypePriorities?: Record; maxSpotPrice?: string; amiIdSsmParameterName?: string; tracingEnabled?: boolean; @@ -766,6 +810,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { launchTemplateName: LAUNCH_TEMPLATE, ec2instanceCriteria: { instanceTypes: ['m5.large', 'c5.large'], + instanceTypePriorities: runnerConfig.instanceTypePriorities, targetCapacityType: runnerConfig.capacityType, maxSpotPrice: runnerConfig.maxSpotPrice, instanceAllocationStrategy: runnerConfig.allocationStrategy, @@ -782,7 +827,8 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { interface ExpectedFleetRequestValues { type: 'Repo' | 'Org'; capacityType: DefaultTargetCapacityType; - allocationStrategy: SpotAllocationStrategy; + allocationStrategy: SpotAllocationStrategy | FleetOnDemandAllocationStrategy; + instanceTypePriorities?: Record; maxSpotPrice?: string; totalTargetCapacity: number; imageId?: string; @@ -815,26 +861,46 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): { InstanceType: 'm5.large', SubnetId: 'subnet-123', + ...(expectedValues.allocationStrategy === 'prioritized' && { + Priority: expectedValues.instanceTypePriorities?.['m5.large'] ?? 0, + }), }, { InstanceType: 'c5.large', SubnetId: 'subnet-123', + ...(expectedValues.allocationStrategy === 'prioritized' && { + Priority: expectedValues.instanceTypePriorities?.['c5.large'] ?? 1, + }), }, { InstanceType: 'm5.large', SubnetId: 'subnet-456', + ...(expectedValues.allocationStrategy === 'prioritized' && { + Priority: expectedValues.instanceTypePriorities?.['m5.large'] ?? 0, + }), }, { InstanceType: 'c5.large', SubnetId: 'subnet-456', + ...(expectedValues.allocationStrategy === 'prioritized' && { + Priority: expectedValues.instanceTypePriorities?.['c5.large'] ?? 1, + }), }, ], }, ], - SpotOptions: { - AllocationStrategy: expectedValues.allocationStrategy, - MaxTotalPrice: expectedValues.maxSpotPrice, - }, + ...(expectedValues.capacityType === 'spot' + ? { + SpotOptions: { + AllocationStrategy: expectedValues.allocationStrategy, + MaxTotalPrice: expectedValues.maxSpotPrice, + }, + } + : { + OnDemandOptions: { + AllocationStrategy: expectedValues.allocationStrategy, + }, + }), TagSpecifications: [ { ResourceType: 'instance', diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 193c82d2e7..ba1d85bba4 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -7,6 +7,8 @@ import { DescribeInstancesResult, EC2Client, FleetLaunchTemplateOverridesRequest, + FleetOnDemandAllocationStrategy, + SpotAllocationStrategy, Tag, TerminateInstancesCommand, _InstanceType, @@ -125,14 +127,17 @@ function generateFleetOverrides( subnetIds: string[], instancesTypes: string[], amiId?: string, + allocationStrategy?: string, + instanceTypePriorities?: Record, ): FleetLaunchTemplateOverridesRequest[] { const result: FleetLaunchTemplateOverridesRequest[] = []; subnetIds.forEach((s) => { - instancesTypes.forEach((i) => { + instancesTypes.forEach((i, index) => { const item: FleetLaunchTemplateOverridesRequest = { SubnetId: s, InstanceType: i as _InstanceType, ImageId: amiId, + ...(allocationStrategy === 'prioritized' && { Priority: instanceTypePriorities?.[i] ?? index }), }; result.push(item); }); @@ -187,11 +192,21 @@ async function processFleetResult( logger.warn(`Create fleet failed, initatiing fall back to on demand instances.`); logger.debug('Create fleet failed.', { data: fleet.Errors }); const numberOfInstances = runnerParameters.numberOfRunners - instances.length; + const onDemandValidStrategies = ['lowest-price', 'prioritized']; + const failoverAllocationStrategy = onDemandValidStrategies.includes( + runnerParameters.ec2instanceCriteria.instanceAllocationStrategy, + ) + ? runnerParameters.ec2instanceCriteria.instanceAllocationStrategy + : 'lowest-price'; const instancesOnDemand = await createRunner({ ...runnerParameters, numberOfRunners: numberOfInstances, onDemandFailoverOnError: ['InsufficientInstanceCapacity'], - ec2instanceCriteria: { ...runnerParameters.ec2instanceCriteria, targetCapacityType: 'on-demand' }, + ec2instanceCriteria: { + ...runnerParameters.ec2instanceCriteria, + targetCapacityType: 'on-demand', + instanceAllocationStrategy: failoverAllocationStrategy, + }, }); instances.push(...instancesOnDemand); return instances; @@ -265,13 +280,25 @@ async function createInstances( runnerParameters.subnets, runnerParameters.ec2instanceCriteria.instanceTypes, amiIdOverride, + runnerParameters.ec2instanceCriteria.instanceAllocationStrategy, + runnerParameters.ec2instanceCriteria.instanceTypePriorities, ), }, ], - SpotOptions: { - MaxTotalPrice: runnerParameters.ec2instanceCriteria.maxSpotPrice, - AllocationStrategy: runnerParameters.ec2instanceCriteria.instanceAllocationStrategy, - }, + ...(runnerParameters.ec2instanceCriteria.targetCapacityType === 'spot' + ? { + SpotOptions: { + MaxTotalPrice: runnerParameters.ec2instanceCriteria.maxSpotPrice, + AllocationStrategy: runnerParameters.ec2instanceCriteria + .instanceAllocationStrategy as SpotAllocationStrategy, + }, + } + : { + OnDemandOptions: { + AllocationStrategy: runnerParameters.ec2instanceCriteria + .instanceAllocationStrategy as FleetOnDemandAllocationStrategy, + }, + }), TargetCapacitySpecification: { TotalTargetCapacity: runnerParameters.numberOfRunners, DefaultTargetCapacityType: runnerParameters.ec2instanceCriteria.targetCapacityType, diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index cece8d9951..c5cfcd1b7e 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -36,6 +36,9 @@ export async function adjust(event: PoolEvent): Promise { const launchTemplateName = process.env.LAUNCH_TEMPLATE_NAME; const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE; const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default + const instanceTypePriorities = process.env.INSTANCE_TYPE_PRIORITIES + ? (JSON.parse(process.env.INSTANCE_TYPE_PRIORITIES) as Record) + : undefined; const runnerOwner = process.env.RUNNER_OWNER; const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); @@ -92,6 +95,7 @@ export async function adjust(event: PoolEvent): Promise { { ec2instanceCriteria: { instanceTypes, + instanceTypePriorities, targetCapacityType: instanceTargetCapacityType, maxSpotPrice: instanceMaxSpotPrice, instanceAllocationStrategy: instanceAllocationStrategy, diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 395c87e8f8..2f99260563 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -308,6 +308,9 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise) + : undefined; const enableJobQueuedCheck = yn(process.env.ENABLE_JOB_QUEUED_CHECK, { default: true }); const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const runnerNamePrefix = process.env.RUNNER_NAME_PREFIX || ''; @@ -497,6 +500,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise