Skip to content

Commit a9ca792

Browse files
committed
feat(termination-watcher): deregister runners from GitHub on EC2 termination
Extend the termination-watcher Lambda to deregister GitHub Actions runners when their EC2 instances terminate. This prevents stale "offline" runner entries from accumulating in the GitHub organization/repository. The implementation: - Adds GitHub API deregistration logic using the module's existing App auth pattern (SSM-stored credentials → createAppAuth → installation token) - Wires deregistration into both termination handlers (BidEvictedEvent and Spot Interruption Warning) - Adds an EC2 Instance State-change Notification EventBridge rule (state: shutting-down) that catches ALL termination types — not just spot events - Feature is opt-in via `enable_runner_deregistration` (default false) - Supports both Org and Repo runner types via the ghr:Type instance tag - Handles GHES via configurable API URL New Terraform variables on `instance_termination_watcher`: - `enable_runner_deregistration` (bool, default false) Fixes #804
1 parent 6dc97d5 commit a9ca792

File tree

19 files changed

+785
-57
lines changed

19 files changed

+785
-57
lines changed

lambdas/functions/termination-watcher/package.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@
2424
},
2525
"dependencies": {
2626
"@aws-github-runner/aws-powertools-util": "*",
27+
"@aws-github-runner/aws-ssm-util": "*",
2728
"@aws-sdk/client-ec2": "^3.984.0",
28-
"@middy/core": "^6.4.5"
29+
"@middy/core": "^6.4.5",
30+
"@octokit/auth-app": "8.2.0",
31+
"@octokit/core": "7.0.6",
32+
"@octokit/plugin-throttling": "11.0.3",
33+
"@octokit/request": "^9.2.2",
34+
"@octokit/rest": "22.0.1"
2935
},
3036
"nx": {
3137
"includedScripts": [

lambdas/functions/termination-watcher/src/ConfigResolver.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ describe('Test ConfigResolver', () => {
3737
delete process.env.ENABLE_METRICS_SPOT_WARNING;
3838
delete process.env.PREFIX;
3939
delete process.env.TAG_FILTERS;
40+
delete process.env.ENABLE_RUNNER_DEREGISTRATION;
41+
delete process.env.GHES_URL;
4042
});
4143

4244
it(description, async () => {
@@ -55,4 +57,29 @@ describe('Test ConfigResolver', () => {
5557
expect(config.tagFilters).toEqual(output.tagFilters);
5658
});
5759
});
60+
61+
describe('runner deregistration config', () => {
62+
beforeEach(() => {
63+
delete process.env.ENABLE_RUNNER_DEREGISTRATION;
64+
delete process.env.GHES_URL;
65+
});
66+
67+
it('should default to disabled', () => {
68+
const config = new Config();
69+
expect(config.enableRunnerDeregistration).toBe(false);
70+
expect(config.ghesApiUrl).toBe('');
71+
});
72+
73+
it('should enable deregistration when env var is true', () => {
74+
process.env.ENABLE_RUNNER_DEREGISTRATION = 'true';
75+
const config = new Config();
76+
expect(config.enableRunnerDeregistration).toBe(true);
77+
});
78+
79+
it('should set GHES URL when provided', () => {
80+
process.env.GHES_URL = 'https://github.internal.co/api/v3';
81+
const config = new Config();
82+
expect(config.ghesApiUrl).toBe('https://github.internal.co/api/v3');
83+
});
84+
});
5885
});

lambdas/functions/termination-watcher/src/ConfigResolver.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ export class Config {
55
createSpotTerminationMetric: boolean;
66
tagFilters: Record<string, string>;
77
prefix: string;
8+
enableRunnerDeregistration: boolean;
9+
ghesApiUrl: string;
810

911
constructor() {
1012
const logger = createChildLogger('config-resolver');
@@ -14,6 +16,8 @@ export class Config {
1416
this.createSpotWarningMetric = process.env.ENABLE_METRICS_SPOT_WARNING === 'true';
1517
this.createSpotTerminationMetric = process.env.ENABLE_METRICS_SPOT_TERMINATION === 'true';
1618
this.prefix = process.env.PREFIX ?? '';
19+
this.enableRunnerDeregistration = process.env.ENABLE_RUNNER_DEREGISTRATION === 'true';
20+
this.ghesApiUrl = process.env.GHES_URL ?? '';
1721
this.tagFilters = { 'ghr:environment': this.prefix };
1822

1923
const rawTagFilters = process.env.TAG_FILTERS;
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
import { Instance } from '@aws-sdk/client-ec2';
2+
import { describe, it, expect, beforeEach, vi } from 'vitest';
3+
import { deregisterRunner, createThrottleOptions } from './deregister';
4+
import { Config } from './ConfigResolver';
5+
import type { EndpointDefaults } from '@octokit/types';
6+
7+
const mockGetParameter = vi.fn();
8+
vi.mock('@aws-github-runner/aws-ssm-util', () => ({
9+
getParameter: (...args: unknown[]) => mockGetParameter(...args),
10+
}));
11+
12+
const mockCreateAppAuth = vi.fn();
13+
vi.mock('@octokit/auth-app', () => ({
14+
createAppAuth: (...args: unknown[]) => mockCreateAppAuth(...args),
15+
}));
16+
17+
const mockPaginate = {
18+
iterator: vi.fn(),
19+
};
20+
21+
const mockActions = {
22+
listSelfHostedRunnersForOrg: vi.fn(),
23+
listSelfHostedRunnersForRepo: vi.fn(),
24+
deleteSelfHostedRunnerFromOrg: vi.fn(),
25+
deleteSelfHostedRunnerFromRepo: vi.fn(),
26+
};
27+
28+
const mockApps = {
29+
getOrgInstallation: vi.fn(),
30+
getRepoInstallation: vi.fn(),
31+
};
32+
33+
function MockOctokit() {
34+
return {
35+
actions: mockActions,
36+
apps: mockApps,
37+
paginate: mockPaginate,
38+
};
39+
}
40+
MockOctokit.plugin = vi.fn().mockReturnValue(MockOctokit);
41+
42+
vi.mock('@octokit/rest', () => ({
43+
Octokit: MockOctokit,
44+
}));
45+
46+
vi.mock('@octokit/plugin-throttling', () => ({
47+
throttling: vi.fn(),
48+
}));
49+
50+
vi.mock('@octokit/request', () => ({
51+
request: {
52+
defaults: vi.fn().mockReturnValue(vi.fn()),
53+
},
54+
}));
55+
56+
const baseConfig: Config = {
57+
createSpotWarningMetric: false,
58+
createSpotTerminationMetric: true,
59+
tagFilters: { 'ghr:environment': 'test' },
60+
prefix: 'runners',
61+
enableRunnerDeregistration: true,
62+
ghesApiUrl: '',
63+
};
64+
65+
const orgInstance: Instance = {
66+
InstanceId: 'i-12345678901234567',
67+
InstanceType: 't2.micro',
68+
Tags: [
69+
{ Key: 'Name', Value: 'test-instance' },
70+
{ Key: 'ghr:environment', Value: 'test' },
71+
{ Key: 'ghr:Owner', Value: 'test-org' },
72+
{ Key: 'ghr:Type', Value: 'Org' },
73+
],
74+
State: { Name: 'running' },
75+
LaunchTime: new Date('2021-01-01'),
76+
};
77+
78+
const repoInstance: Instance = {
79+
InstanceId: 'i-repo12345678901234',
80+
InstanceType: 't2.micro',
81+
Tags: [
82+
{ Key: 'Name', Value: 'test-repo-instance' },
83+
{ Key: 'ghr:environment', Value: 'test' },
84+
{ Key: 'ghr:Owner', Value: 'test-org/test-repo' },
85+
{ Key: 'ghr:Type', Value: 'Repo' },
86+
],
87+
State: { Name: 'running' },
88+
LaunchTime: new Date('2021-01-01'),
89+
};
90+
91+
function setupAuthMocks() {
92+
const appPrivateKey = Buffer.from('fake-private-key').toString('base64');
93+
mockGetParameter.mockImplementation((name: string) => {
94+
if (name === 'github-app-id') return Promise.resolve('12345');
95+
if (name === 'github-app-key') return Promise.resolve(appPrivateKey);
96+
return Promise.reject(new Error(`Unknown parameter: ${name}`));
97+
});
98+
99+
// App auth returns app token
100+
const mockAuth = vi.fn();
101+
mockAuth.mockImplementation((opts: { type: string }) => {
102+
if (opts.type === 'app') {
103+
return Promise.resolve({ token: 'app-token' });
104+
}
105+
return Promise.resolve({ token: 'installation-token' });
106+
});
107+
mockCreateAppAuth.mockReturnValue(mockAuth);
108+
}
109+
110+
describe('deregisterRunner', () => {
111+
beforeEach(() => {
112+
vi.clearAllMocks();
113+
process.env.PARAMETER_GITHUB_APP_ID_NAME = 'github-app-id';
114+
process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME = 'github-app-key';
115+
setupAuthMocks();
116+
});
117+
118+
it('should skip deregistration when disabled', async () => {
119+
await deregisterRunner(orgInstance, { ...baseConfig, enableRunnerDeregistration: false });
120+
expect(mockGetParameter).not.toHaveBeenCalled();
121+
});
122+
123+
it('should skip deregistration when instance ID is missing', async () => {
124+
const instance: Instance = { ...orgInstance, InstanceId: undefined };
125+
await deregisterRunner(instance, baseConfig);
126+
expect(mockGetParameter).not.toHaveBeenCalled();
127+
});
128+
129+
it('should skip deregistration when ghr:Owner tag is missing', async () => {
130+
const instance: Instance = {
131+
...orgInstance,
132+
Tags: [{ Key: 'Name', Value: 'test' }],
133+
};
134+
await deregisterRunner(instance, baseConfig);
135+
// Auth should not be called since we bail early
136+
expect(mockCreateAppAuth).not.toHaveBeenCalled();
137+
});
138+
139+
it('should deregister an org runner successfully', async () => {
140+
mockApps.getOrgInstallation.mockResolvedValue({ data: { id: 999 } });
141+
142+
async function* fakeIterator() {
143+
yield { data: [{ id: 42, name: `runner-i-12345678901234567` }] };
144+
}
145+
mockPaginate.iterator.mockReturnValue(fakeIterator());
146+
147+
mockActions.deleteSelfHostedRunnerFromOrg.mockResolvedValue({});
148+
149+
await deregisterRunner(orgInstance, baseConfig);
150+
151+
expect(mockApps.getOrgInstallation).toHaveBeenCalledWith({ org: 'test-org' });
152+
expect(mockActions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
153+
org: 'test-org',
154+
runner_id: 42,
155+
});
156+
});
157+
158+
it('should deregister a repo runner successfully', async () => {
159+
mockApps.getRepoInstallation.mockResolvedValue({ data: { id: 888 } });
160+
161+
async function* fakeIterator() {
162+
yield { data: [{ id: 55, name: `runner-i-repo12345678901234` }] };
163+
}
164+
mockPaginate.iterator.mockReturnValue(fakeIterator());
165+
166+
mockActions.deleteSelfHostedRunnerFromRepo.mockResolvedValue({});
167+
168+
await deregisterRunner(repoInstance, baseConfig);
169+
170+
expect(mockApps.getRepoInstallation).toHaveBeenCalledWith({ owner: 'test-org', repo: 'test-repo' });
171+
expect(mockActions.deleteSelfHostedRunnerFromRepo).toHaveBeenCalledWith({
172+
owner: 'test-org',
173+
repo: 'test-repo',
174+
runner_id: 55,
175+
});
176+
});
177+
178+
it('should handle runner not found gracefully', async () => {
179+
mockApps.getOrgInstallation.mockResolvedValue({ data: { id: 999 } });
180+
181+
async function* fakeIterator() {
182+
yield { data: [{ id: 42, name: 'runner-other-instance' }] };
183+
}
184+
mockPaginate.iterator.mockReturnValue(fakeIterator());
185+
186+
await deregisterRunner(orgInstance, baseConfig);
187+
188+
expect(mockActions.deleteSelfHostedRunnerFromOrg).not.toHaveBeenCalled();
189+
});
190+
191+
it('should handle GitHub API errors gracefully', async () => {
192+
mockApps.getOrgInstallation.mockRejectedValue(new Error('GitHub API error'));
193+
194+
await deregisterRunner(orgInstance, baseConfig);
195+
196+
// Should not throw — error is caught internally
197+
expect(mockActions.deleteSelfHostedRunnerFromOrg).not.toHaveBeenCalled();
198+
});
199+
200+
it('should default to Org runner type when ghr:Type tag is missing', async () => {
201+
const instance: Instance = {
202+
...orgInstance,
203+
Tags: [
204+
{ Key: 'ghr:environment', Value: 'test' },
205+
{ Key: 'ghr:Owner', Value: 'test-org' },
206+
],
207+
};
208+
209+
mockApps.getOrgInstallation.mockResolvedValue({ data: { id: 999 } });
210+
211+
async function* fakeIterator() {
212+
yield { data: [{ id: 42, name: `runner-i-12345678901234567` }] };
213+
}
214+
mockPaginate.iterator.mockReturnValue(fakeIterator());
215+
216+
mockActions.deleteSelfHostedRunnerFromOrg.mockResolvedValue({});
217+
218+
await deregisterRunner(instance, baseConfig);
219+
220+
expect(mockApps.getOrgInstallation).toHaveBeenCalledWith({ org: 'test-org' });
221+
expect(mockActions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
222+
org: 'test-org',
223+
runner_id: 42,
224+
});
225+
});
226+
227+
it('should use GHES API URL when configured', async () => {
228+
const ghesConfig = { ...baseConfig, ghesApiUrl: 'https://github.internal.co/api/v3' };
229+
230+
mockApps.getOrgInstallation.mockResolvedValue({ data: { id: 999 } });
231+
232+
async function* fakeIterator() {
233+
yield { data: [{ id: 42, name: `runner-i-12345678901234567` }] };
234+
}
235+
mockPaginate.iterator.mockReturnValue(fakeIterator());
236+
237+
mockActions.deleteSelfHostedRunnerFromOrg.mockResolvedValue({});
238+
239+
await deregisterRunner(orgInstance, ghesConfig);
240+
241+
expect(mockActions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalled();
242+
});
243+
244+
it('should paginate through multiple pages to find runner', async () => {
245+
mockApps.getOrgInstallation.mockResolvedValue({ data: { id: 999 } });
246+
247+
async function* fakeIterator() {
248+
yield { data: [{ id: 1, name: 'runner-other-1' }] };
249+
yield { data: [{ id: 2, name: 'runner-other-2' }] };
250+
yield { data: [{ id: 42, name: `runner-i-12345678901234567` }] };
251+
}
252+
mockPaginate.iterator.mockReturnValue(fakeIterator());
253+
254+
mockActions.deleteSelfHostedRunnerFromOrg.mockResolvedValue({});
255+
256+
await deregisterRunner(orgInstance, baseConfig);
257+
258+
expect(mockActions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
259+
org: 'test-org',
260+
runner_id: 42,
261+
});
262+
});
263+
264+
it('should handle repo runner not found gracefully', async () => {
265+
mockApps.getRepoInstallation.mockResolvedValue({ data: { id: 888 } });
266+
267+
async function* fakeIterator() {
268+
yield { data: [{ id: 99, name: 'runner-other-instance' }] };
269+
}
270+
mockPaginate.iterator.mockReturnValue(fakeIterator());
271+
272+
await deregisterRunner(repoInstance, baseConfig);
273+
274+
expect(mockActions.deleteSelfHostedRunnerFromRepo).not.toHaveBeenCalled();
275+
});
276+
277+
it('should handle instance with no tags', async () => {
278+
const instance: Instance = {
279+
InstanceId: 'i-12345678901234567',
280+
Tags: undefined,
281+
};
282+
await deregisterRunner(instance, baseConfig);
283+
expect(mockCreateAppAuth).not.toHaveBeenCalled();
284+
});
285+
});
286+
287+
describe('createThrottleOptions', () => {
288+
it('should return false for rate limit and log warning', () => {
289+
const options = createThrottleOptions();
290+
const endpointDefaults = { method: 'GET', url: '/test' } as Required<EndpointDefaults>;
291+
292+
expect(options.onRateLimit(60, endpointDefaults)).toBe(false);
293+
expect(options.onSecondaryRateLimit(60, endpointDefaults)).toBe(false);
294+
});
295+
});

0 commit comments

Comments
 (0)