Skip to content

Commit 9f37a04

Browse files
committed
feat: add retry logic to Octokit client and add ScaleUp tests
1 parent 7e33d64 commit 9f37a04

File tree

4 files changed

+124
-5
lines changed

4 files changed

+124
-5
lines changed

lambdas/functions/control-plane/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"@middy/core": "^6.4.5",
3939
"@octokit/auth-app": "8.1.2",
4040
"@octokit/core": "7.0.6",
41+
"@octokit/plugin-retry": "8.0.3",
4142
"@octokit/plugin-throttling": "11.0.3",
4243
"@octokit/rest": "22.0.1",
4344
"cron-parser": "^5.4.0"

lambdas/functions/control-plane/src/github/auth.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type StrategyOptions = {
1818
};
1919
import { request } from '@octokit/request';
2020
import { Octokit } from '@octokit/rest';
21+
import { retry } from '@octokit/plugin-retry';
2122
import { throttling } from '@octokit/plugin-throttling';
2223
import { createChildLogger } from '@aws-github-runner/aws-powertools-util';
2324
import { getParameter } from '@aws-github-runner/aws-ssm-util';
@@ -26,7 +27,7 @@ import { EndpointDefaults } from '@octokit/types';
2627
const logger = createChildLogger('gh-auth');
2728

2829
export async function createOctokitClient(token: string, ghesApiUrl = ''): Promise<Octokit> {
29-
const CustomOctokit = Octokit.plugin(throttling);
30+
const CustomOctokit = Octokit.plugin(retry, throttling);
3031
const ocktokitOptions: OctokitOptions = {
3132
auth: token,
3233
};
@@ -38,6 +39,17 @@ export async function createOctokitClient(token: string, ghesApiUrl = ''): Promi
3839
return new CustomOctokit({
3940
...ocktokitOptions,
4041
userAgent: process.env.USER_AGENT || 'github-aws-runners',
42+
retry: {
43+
onRetry: (retryCount: number, error: Error, request: { method: string; url: string }) => {
44+
logger.warn('GitHub API request retry attempt', {
45+
retryCount,
46+
method: request.method,
47+
url: request.url,
48+
error: error.message,
49+
status: (error as Error & { status?: number }).status,
50+
});
51+
},
52+
},
4153
throttle: {
4254
onRateLimit: (retryAfter: number, options: Required<EndpointDefaults>) => {
4355
logger.warn(

lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,13 @@ describe('scaleUp with GHES', () => {
346346
return [];
347347
});
348348

349-
let callCount = 0;
350349
mockOctokit.actions.generateRunnerJitconfigForOrg.mockImplementation(({ name }) => {
351-
callCount++;
352350
if (name === 'unit-test-i-instance-2') {
353351
// Simulate a 503 Service Unavailable error from GitHub
354-
const error = new Error('Service Unavailable') as any;
352+
const error = new Error('Service Unavailable') as Error & {
353+
status: number;
354+
response: { status: number; data: { message: string } };
355+
};
355356
error.status = 503;
356357
error.response = {
357358
status: 503,
@@ -361,7 +362,7 @@ describe('scaleUp with GHES', () => {
361362
}
362363
return {
363364
data: {
364-
runner: { id: 9876543210 + callCount },
365+
runner: { id: 9876543210 },
365366
encoded_jit_config: `TEST_JIT_CONFIG_${name}`,
366367
},
367368
headers: {},
@@ -410,6 +411,97 @@ describe('scaleUp with GHES', () => {
410411
});
411412
});
412413

414+
it('should handle retryable errors with error handling logic', async () => {
415+
process.env.RUNNERS_MAXIMUM_COUNT = '5';
416+
mockCreateRunner.mockImplementation(async () => {
417+
return ['i-instance-1', 'i-instance-2'];
418+
});
419+
mockListRunners.mockImplementation(async () => {
420+
return [];
421+
});
422+
423+
mockOctokit.actions.generateRunnerJitconfigForOrg.mockImplementation(({ name }) => {
424+
if (name === 'unit-test-i-instance-1') {
425+
const error = new Error('Internal Server Error') as Error & {
426+
status: number;
427+
response: { status: number; data: { message: string } };
428+
};
429+
error.status = 500;
430+
error.response = {
431+
status: 500,
432+
data: { message: 'Internal server error' },
433+
};
434+
throw error;
435+
}
436+
return {
437+
data: {
438+
runner: { id: 9876543210 },
439+
encoded_jit_config: `TEST_JIT_CONFIG_${name}`,
440+
},
441+
headers: {},
442+
};
443+
});
444+
445+
await scaleUpModule.scaleUp(TEST_DATA);
446+
447+
expect(mockSSMClient).toHaveReceivedCommandWith(PutParameterCommand, {
448+
Name: '/github-action-runners/default/runners/config/i-instance-2',
449+
Value: 'TEST_JIT_CONFIG_unit-test-i-instance-2',
450+
Type: 'SecureString',
451+
Tags: [{ Key: 'InstanceId', Value: 'i-instance-2' }],
452+
});
453+
454+
expect(mockSSMClient).not.toHaveReceivedCommandWith(PutParameterCommand, {
455+
Name: '/github-action-runners/default/runners/config/i-instance-1',
456+
});
457+
});
458+
459+
it('should handle non-retryable 4xx errors gracefully', async () => {
460+
process.env.RUNNERS_MAXIMUM_COUNT = '5';
461+
mockCreateRunner.mockImplementation(async () => {
462+
return ['i-instance-1', 'i-instance-2'];
463+
});
464+
mockListRunners.mockImplementation(async () => {
465+
return [];
466+
});
467+
468+
mockOctokit.actions.generateRunnerJitconfigForOrg.mockImplementation(({ name }) => {
469+
if (name === 'unit-test-i-instance-1') {
470+
// 404 is not retryable - will fail immediately
471+
const error = new Error('Not Found') as Error & {
472+
status: number;
473+
response: { status: number; data: { message: string } };
474+
};
475+
error.status = 404;
476+
error.response = {
477+
status: 404,
478+
data: { message: 'Resource not found' },
479+
};
480+
throw error;
481+
}
482+
return {
483+
data: {
484+
runner: { id: 9876543210 },
485+
encoded_jit_config: `TEST_JIT_CONFIG_${name}`,
486+
},
487+
headers: {},
488+
};
489+
});
490+
491+
await scaleUpModule.scaleUp(TEST_DATA);
492+
493+
expect(mockSSMClient).toHaveReceivedCommandWith(PutParameterCommand, {
494+
Name: '/github-action-runners/default/runners/config/i-instance-2',
495+
Value: 'TEST_JIT_CONFIG_unit-test-i-instance-2',
496+
Type: 'SecureString',
497+
Tags: [{ Key: 'InstanceId', Value: 'i-instance-2' }],
498+
});
499+
500+
expect(mockSSMClient).not.toHaveReceivedCommandWith(PutParameterCommand, {
501+
Name: '/github-action-runners/default/runners/config/i-instance-1',
502+
});
503+
});
504+
413505
it.each(RUNNER_TYPES)(
414506
'calls create start runner config of 40' + ' instances (ssm rate limit condition) to test time delay ',
415507
async (type: RunnerType) => {

lambdas/yarn.lock

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ __metadata:
155155
"@middy/core": "npm:^6.4.5"
156156
"@octokit/auth-app": "npm:8.1.2"
157157
"@octokit/core": "npm:7.0.6"
158+
"@octokit/plugin-retry": "npm:8.0.3"
158159
"@octokit/plugin-throttling": "npm:11.0.3"
159160
"@octokit/rest": "npm:22.0.1"
160161
"@octokit/types": "npm:^16.0.0"
@@ -3685,6 +3686,19 @@ __metadata:
36853686
languageName: node
36863687
linkType: hard
36873688

3689+
"@octokit/plugin-retry@npm:8.0.3":
3690+
version: 8.0.3
3691+
resolution: "@octokit/plugin-retry@npm:8.0.3"
3692+
dependencies:
3693+
"@octokit/request-error": "npm:^7.0.2"
3694+
"@octokit/types": "npm:^16.0.0"
3695+
bottleneck: "npm:^2.15.3"
3696+
peerDependencies:
3697+
"@octokit/core": ">=7"
3698+
checksum: 10c0/24d35d85f750f9e3e52f63b8ddd8fc8aa7bdd946c77b9ea4d6894d026c5c2c69109e8de3880a9970c906f624eb777c7d0c0a2072e6d41dadc7b36cce104b978c
3699+
languageName: node
3700+
linkType: hard
3701+
36883702
"@octokit/plugin-throttling@npm:11.0.3":
36893703
version: 11.0.3
36903704
resolution: "@octokit/plugin-throttling@npm:11.0.3"

0 commit comments

Comments
 (0)