Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,35 @@ describe('Scale down runners', () => {
await expect(scaleDown()).resolves.not.toThrow();
});

it(`Should not terminate instance when de-registration throws an error.`, async () => {
// setup - runner should NOT be terminated because de-registration fails
const runners = [createRunnerTestData('idle-1', type, MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, false)];

const error502 = new RequestError('Server Error', 502, {
request: {
method: 'DELETE',
url: 'https://api.github.com/test',
headers: {},
},
});

mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => {
throw error502;
});
mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation(() => {
throw error502;
});

mockGitHubRunners(runners);
mockAwsRunners(runners);

// act
await expect(scaleDown()).resolves.not.toThrow();

// assert - should NOT terminate since de-registration failed
expect(terminateRunner).not.toHaveBeenCalled();
});

const evictionStrategies = ['oldest_first', 'newest_first'];
describe.each(evictionStrategies)('When idle config defined', (evictionStrategy) => {
const defaultConfig = {
Expand Down
57 changes: 40 additions & 17 deletions lambdas/functions/control-plane/src/scale-runners/scale-down.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,33 @@ function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean {
return launchTimePlusMinimum < now;
}

async function deleteGitHubRunner(
githubAppClient: Octokit,
ec2runner: RunnerInfo,
ghRunnerId: number,
): Promise<{ ghRunnerId: number; status: number; success: boolean }> {
try {
const response =
ec2runner.type === 'Org'
? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({
runner_id: ghRunnerId,
org: ec2runner.owner,
})
: await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({
runner_id: ghRunnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
});
return { ghRunnerId, status: response.status, success: response.status === 204 };
} catch (error) {
logger.error(
`Failed to de-register GitHub runner ${ghRunnerId} for instance '${ec2runner.instanceId}'. Error: ${error}`,
{ error: error as Error },
);
return { ghRunnerId, status: 0, success: false };
}
}

async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise<void> {
const githubAppClient = await getOrCreateOctokit(ec2runner);
try {
Expand All @@ -146,28 +173,24 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promi
);

if (states.every((busy) => busy === false)) {
const statuses = await Promise.all(
ghRunnerIds.map(async (ghRunnerId) => {
return (
ec2runner.type === 'Org'
? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({
runner_id: ghRunnerId,
org: ec2runner.owner,
})
: await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({
runner_id: ghRunnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
})
).status;
}),
const results = await Promise.all(
ghRunnerIds.map((ghRunnerId) => deleteGitHubRunner(githubAppClient, ec2runner, ghRunnerId)),
);

if (statuses.every((status) => status == 204)) {
const allSucceeded = results.every((r) => r.success);
const failedRunners = results.filter((r) => !r.success);

if (allSucceeded) {
await terminateRunner(ec2runner.instanceId);
logger.info(`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`);
} else {
logger.error(`Failed to de-register GitHub runner: ${statuses}`);
// Only terminate EC2 if we successfully de-registered from GitHub
// Otherwise, leave the instance running so the next scale-down cycle can retry
logger.error(
`Failed to de-register ${failedRunners.length} GitHub runner(s) for instance '${ec2runner.instanceId}'. ` +
`Instance will NOT be terminated to allow retry on next scale-down cycle. ` +
`Failed runner IDs: ${failedRunners.map((r) => r.ghRunnerId).join(', ')}`,
);
}
} else {
logger.info(`Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`);
Expand Down
Loading