@@ -5,7 +5,7 @@ import { createChildLogger } from '@aws-github-runner/aws-powertools-util';
55import moment from 'moment' ;
66
77import { createGithubAppAuth , createGithubInstallationAuth , createOctokitClient } from '../github/auth' ;
8- import { bootTimeExceeded , listEC2Runners , tag , untag , terminateRunner } from './../aws/runners' ;
8+ import { bootTimeExceeded , listEC2Runners , tag , untag , terminateRunner , stopRunner } from './../aws/runners' ;
99import { RunnerInfo , RunnerList } from './../aws/runners.d' ;
1010import { GhRunners , githubCache } from './cache' ;
1111import { ScalingDownConfig , getEvictionStrategy , getIdleRunnerCount } from './scale-down-config' ;
@@ -127,39 +127,52 @@ function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean {
127127 return launchTimePlusMinimum < now ;
128128}
129129
130- async function removeRunner ( ec2runner : RunnerInfo , ghRunnerIds : number [ ] ) : Promise < void > {
130+ function runnerIdleTimeExceeded ( runner : RunnerInfo ) : boolean {
131+ const idleTimeMinutes = parseInt ( process . env . STANDBY_IDLE_TIME_MINUTES || '0' ) ;
132+ if ( idleTimeMinutes === 0 ) return true ;
133+
134+ const launchTimePlusIdle = moment ( runner . launchTime ) . utc ( ) . add ( idleTimeMinutes , 'minutes' ) ;
135+ const now = moment ( new Date ( ) ) . utc ( ) ;
136+ return launchTimePlusIdle < now ;
137+ }
138+
139+ async function removeRunner ( ec2runner : RunnerInfo , ghRunnerIds : number [ ] , shouldStop : boolean = false ) : Promise < void > {
131140 const githubAppClient = await getOrCreateOctokit ( ec2runner ) ;
132141 try {
133142 const states = await Promise . all (
134143 ghRunnerIds . map ( async ( ghRunnerId ) => {
135- // Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition.
136144 return await getGitHubRunnerBusyState ( githubAppClient , ec2runner , ghRunnerId ) ;
137145 } ) ,
138146 ) ;
139147
140148 if ( states . every ( ( busy ) => busy === false ) ) {
141- const statuses = await Promise . all (
142- ghRunnerIds . map ( async ( ghRunnerId ) => {
143- return (
144- ec2runner . type === 'Org'
145- ? await githubAppClient . actions . deleteSelfHostedRunnerFromOrg ( {
146- runner_id : ghRunnerId ,
147- org : ec2runner . owner ,
148- } )
149- : await githubAppClient . actions . deleteSelfHostedRunnerFromRepo ( {
150- runner_id : ghRunnerId ,
151- owner : ec2runner . owner . split ( '/' ) [ 0 ] ,
152- repo : ec2runner . owner . split ( '/' ) [ 1 ] ,
153- } )
154- ) . status ;
155- } ) ,
156- ) ;
157-
158- if ( statuses . every ( ( status ) => status == 204 ) ) {
159- await terminateRunner ( ec2runner . instanceId ) ;
160- logger . info ( `AWS runner instance '${ ec2runner . instanceId } ' is terminated and GitHub runner is de-registered.` ) ;
149+ if ( shouldStop ) {
150+ await stopRunner ( ec2runner . instanceId ) ;
151+ logger . info ( `AWS runner instance '${ ec2runner . instanceId } ' is stopped and moved to standby.` ) ;
161152 } else {
162- logger . error ( `Failed to de-register GitHub runner: ${ statuses } ` ) ;
153+ const statuses = await Promise . all (
154+ ghRunnerIds . map ( async ( ghRunnerId ) => {
155+ return (
156+ ec2runner . type === 'Org'
157+ ? await githubAppClient . actions . deleteSelfHostedRunnerFromOrg ( {
158+ runner_id : ghRunnerId ,
159+ org : ec2runner . owner ,
160+ } )
161+ : await githubAppClient . actions . deleteSelfHostedRunnerFromRepo ( {
162+ runner_id : ghRunnerId ,
163+ owner : ec2runner . owner . split ( '/' ) [ 0 ] ,
164+ repo : ec2runner . owner . split ( '/' ) [ 1 ] ,
165+ } )
166+ ) . status ;
167+ } ) ,
168+ ) ;
169+
170+ if ( statuses . every ( ( status ) => status == 204 ) ) {
171+ await terminateRunner ( ec2runner . instanceId ) ;
172+ logger . info ( `AWS runner instance '${ ec2runner . instanceId } ' is terminated and GitHub runner is de-registered.` ) ;
173+ } else {
174+ logger . error ( `Failed to de-register GitHub runner: ${ statuses } ` ) ;
175+ }
163176 }
164177 } else {
165178 logger . info ( `Runner '${ ec2runner . instanceId } ' cannot be de-registered, because it is still busy.` ) ;
@@ -178,13 +191,25 @@ async function evaluateAndRemoveRunners(
178191 let idleCounter = getIdleRunnerCount ( scaleDownConfigs ) ;
179192 const evictionStrategy = getEvictionStrategy ( scaleDownConfigs ) ;
180193 const ownerTags = new Set ( ec2Runners . map ( ( runner ) => runner . owner ) ) ;
194+
195+ const standbyPoolSize = parseInt ( process . env . STANDBY_POOL_SIZE || '0' ) ;
196+ const environment = process . env . ENVIRONMENT ;
181197
182198 for ( const ownerTag of ownerTags ) {
183199 const ec2RunnersFiltered = ec2Runners
184200 . filter ( ( runner ) => runner . owner === ownerTag )
185201 . sort ( evictionStrategy === 'oldest_first' ? oldestFirstStrategy : newestFirstStrategy ) ;
186202 logger . debug ( `Found: '${ ec2RunnersFiltered . length } ' active GitHub runners with owner tag: '${ ownerTag } '` ) ;
187203 logger . debug ( `Active GitHub runners with owner tag: '${ ownerTag } ': ${ JSON . stringify ( ec2RunnersFiltered ) } ` ) ;
204+
205+ const standbyRunners = standbyPoolSize > 0
206+ ? await listEC2Runners ( { environment, runnerType : 'Org' , runnerOwner : ownerTag , statuses : [ 'stopped' ] , standby : true } )
207+ : [ ] ;
208+ const currentStandbyCount = standbyRunners . length ;
209+ let standbyCounter = Math . max ( 0 , standbyPoolSize - currentStandbyCount ) ;
210+
211+ logger . debug ( `Standby pool: target=${ standbyPoolSize } , current=${ currentStandbyCount } , needed=${ standbyCounter } ` ) ;
212+
188213 for ( const ec2Runner of ec2RunnersFiltered ) {
189214 const ghRunners = await listGitHubRunners ( ec2Runner ) ;
190215 const ghRunnersFiltered = ghRunners . filter ( ( runner : { name : string } ) =>
@@ -201,6 +226,16 @@ async function evaluateAndRemoveRunners(
201226 if ( idleCounter > 0 ) {
202227 idleCounter -- ;
203228 logger . info ( `Runner '${ ec2Runner . instanceId } ' will be kept idle.` ) ;
229+ } else if ( standbyCounter > 0 && runnerIdleTimeExceeded ( ec2Runner ) ) {
230+ standbyCounter -- ;
231+ logger . info ( `Runner '${ ec2Runner . instanceId } ' will be moved to standby.` ) ;
232+ await removeRunner (
233+ ec2Runner ,
234+ ghRunnersFiltered . map ( ( runner : { id : number } ) => runner . id ) ,
235+ true ,
236+ ) ;
237+ } else if ( standbyCounter > 0 ) {
238+ logger . info ( `Runner '${ ec2Runner . instanceId } ' waiting for idle time before moving to standby.` ) ;
204239 } else {
205240 logger . info ( `Terminating all non busy runners.` ) ;
206241 await removeRunner (
0 commit comments