@@ -313,6 +313,91 @@ function filterRunners(ec2runners: RunnerList[]): RunnerInfo[] {
313313 return ec2runners . filter ( ( ec2Runner ) => ec2Runner . type && ! ec2Runner . orphan ) as RunnerInfo [ ] ;
314314}
315315
316+ async function reconcileGitHubRunners ( environment : string , ec2Runners : RunnerList [ ] ) : Promise < void > {
317+ const offlineThresholdMinutes = parseInt ( process . env . OFFLINE_RUNNER_DEREGISTER_MINUTES ?? '10' ) ;
318+ if ( offlineThresholdMinutes <= 0 ) {
319+ logger . debug ( 'Offline runner reconciliation is disabled (threshold <= 0)' ) ;
320+ return ;
321+ }
322+
323+ const ec2InstanceIds = new Set ( ec2Runners . map ( ( r ) => r . instanceId ) ) ;
324+
325+ // Build a set of unique owners/types from the EC2 runners we know about.
326+ // If there are no EC2 runners, we still need at least one owner to query GitHub.
327+ // Fall back to environment tags to find the org.
328+ const ownerTypes = new Map < string , string > ( ) ;
329+ for ( const r of ec2Runners ) {
330+ if ( r . owner && r . type ) {
331+ ownerTypes . set ( r . owner , r . type ) ;
332+ }
333+ }
334+
335+ // If no EC2 runners exist, we can't determine the owner to query GitHub.
336+ // This is fine — the scale-up Lambda will handle it once new runners register.
337+ if ( ownerTypes . size === 0 ) {
338+ logger . debug ( 'No EC2 runners with owner tags found, skipping GitHub runner reconciliation' ) ;
339+ return ;
340+ }
341+
342+ for ( const [ owner , runnerType ] of ownerTypes ) {
343+ try {
344+ // Create a synthetic RunnerInfo to reuse the existing GitHub client helpers
345+ const syntheticRunner : RunnerInfo = { instanceId : 'reconciler' , owner, type : runnerType } ;
346+ const ghRunners = await listGitHubRunners ( syntheticRunner ) ;
347+
348+ // Find GitHub runners whose name contains an environment prefix that matches ours,
349+ // that are offline, and have no corresponding EC2 instance
350+ const orphanedGhRunners = ghRunners . filter ( ( ghRunner : { name : string ; status : string ; id : number } ) => {
351+ if ( ghRunner . status !== 'offline' ) return false ;
352+ // Check if this runner's EC2 instance still exists
353+ const matchesEc2 = Array . from ( ec2InstanceIds ) . some ( ( instanceId ) => ghRunner . name . includes ( instanceId ) ) ;
354+ return ! matchesEc2 ;
355+ } ) ;
356+
357+ if ( orphanedGhRunners . length === 0 ) {
358+ logger . debug ( `No orphaned GitHub runners found for owner '${ owner } '` ) ;
359+ continue ;
360+ }
361+
362+ logger . info (
363+ `Found ${ orphanedGhRunners . length } offline GitHub runner(s) with no EC2 instance for owner '${ owner } '` ,
364+ ) ;
365+
366+ const client = await getOrCreateOctokit ( syntheticRunner ) ;
367+ for ( const ghRunner of orphanedGhRunners ) {
368+ try {
369+ if ( runnerType === 'Org' ) {
370+ await client . actions . deleteSelfHostedRunnerFromOrg ( {
371+ org : owner ,
372+ runner_id : ( ghRunner as { id : number } ) . id ,
373+ } ) ;
374+ } else {
375+ const [ repoOwner , repo ] = owner . split ( '/' ) ;
376+ await client . actions . deleteSelfHostedRunnerFromRepo ( {
377+ owner : repoOwner ,
378+ repo,
379+ runner_id : ( ghRunner as { id : number } ) . id ,
380+ } ) ;
381+ }
382+ logger . info ( `Deregistered orphaned GitHub runner '${ ( ghRunner as { name : string } ) . name } ' (ID: ${ ( ghRunner as { id : number } ) . id } )` ) ;
383+ } catch ( error ) {
384+ if ( error instanceof RequestError && error . status === 422 ) {
385+ logger . warn (
386+ `Cannot deregister runner '${ ( ghRunner as { name : string } ) . name } ' — still marked as busy. Will retry next cycle.` ,
387+ ) ;
388+ } else {
389+ logger . error ( `Failed to deregister orphaned runner '${ ( ghRunner as { name : string } ) . name } '` , {
390+ error : error as Error ,
391+ } ) ;
392+ }
393+ }
394+ }
395+ } catch ( error ) {
396+ logger . warn ( `Failed to reconcile GitHub runners for owner '${ owner } '` , { error : error as Error } ) ;
397+ }
398+ }
399+ }
400+
316401export async function scaleDown ( ) : Promise < void > {
317402 githubCache . reset ( ) ;
318403 const environment = process . env . ENVIRONMENT ;
@@ -327,6 +412,11 @@ export async function scaleDown(): Promise<void> {
327412 logger . info ( `Found: '${ activeEc2RunnersCount } ' active GitHub EC2 runner instances before clean-up.` ) ;
328413 logger . debug ( `Active GitHub EC2 runner instances: ${ JSON . stringify ( ec2Runners ) } ` ) ;
329414
415+ // Reconcile: deregister GitHub runners whose EC2 instances no longer exist.
416+ // This prevents deadlocks where offline ghost runners count toward the max,
417+ // blocking scale-up from launching replacements.
418+ await reconcileGitHubRunners ( environment , ec2Runners ) ;
419+
330420 if ( activeEc2RunnersCount === 0 ) {
331421 logger . debug ( `No active runners found for environment: '${ environment } '` ) ;
332422 return ;
0 commit comments