diff --git a/README.md b/README.md index 351a160a62..2374321c14 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [additional\_github\_apps](#input\_additional\_github\_apps) | Additional GitHub Apps for distributing API rate limit usage. Each must be installed on the same repos/orgs as the primary app. |
list(object({
key_base64 = optional(string)
key_base64_ssm = optional(object({ arn = string, name = string }))
id = optional(string)
id_ssm = optional(object({ arn = string, name = string }))
installation_id = optional(string)
installation_id_ssm = optional(object({ arn = string, name = string }))
}))
| `[]` | no | | [ami](#input\_ami) | AMI configuration for the action runner instances. This object allows you to specify all AMI-related settings in one place.

Parameters:
- `filter`: Map of lists to filter AMIs by various criteria (e.g., { name = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-*"], state = ["available"] })
- `owners`: List of AMI owners to limit the search. Common values: ["amazon"], ["self"], or specific AWS account IDs
- `id_ssm_parameter_arn`: ARN of an SSM parameter containing the AMI ID. If specified, this overrides both AMI filter and parameter name
- `kms_key_arn`: Optional KMS key ARN if the AMI is encrypted with a customer managed key

Defaults to null, in which case the module falls back to individual AMI variables (deprecated). |
object({
filter = optional(map(list(string)), { state = ["available"] })
owners = optional(list(string), ["amazon"])
id_ssm_parameter_arn = optional(string, null)
kms_key_arn = optional(string, null)
})
| `null` | no | | [ami\_housekeeper\_cleanup\_config](#input\_ami\_housekeeper\_cleanup\_config) | Configuration for AMI cleanup.

`amiFilters` - Filters to use when searching for AMIs to cleanup. Default filter for images owned by the account and that are available.
`dryRun` - If true, no AMIs will be deregistered. Default false.
`launchTemplateNames` - Launch template names to use when searching for AMIs to cleanup. Default no launch templates.
`maxItems` - The maximum number of AMIs that will be queried for cleanup. Default no maximum.
`minimumDaysOld` - Minimum number of days old an AMI must be to be considered for cleanup. Default 30.
`ssmParameterNames` - SSM parameter names to use when searching for AMIs to cleanup. This parameter should be set when using SSM to configure the AMI to use. Default no SSM parameters. |
object({
amiFilters = optional(list(object({
Name = string
Values = list(string)
})),
[{
Name : "state",
Values : ["available"],
},
{
Name : "image-type",
Values : ["machine"],
}]
)
dryRun = optional(bool, false)
launchTemplateNames = optional(list(string))
maxItems = optional(number)
minimumDaysOld = optional(number, 30)
ssmParameterNames = optional(list(string))
})
| `{}` | no | | [ami\_housekeeper\_lambda\_s3\_key](#input\_ami\_housekeeper\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/examples/multi-runner/README.md b/examples/multi-runner/README.md index 8f14b48503..e7609a8c77 100644 --- a/examples/multi-runner/README.md +++ b/examples/multi-runner/README.md @@ -16,6 +16,10 @@ For exact match, all the labels defined in the workflow should be present in the For the list of provided runner configurations, there will be a single webhook and only a single GitHub App to receive the notifications for all types of workflow triggers. +## Multiple GitHub Apps (rate limit distribution) + +This example also shows how to optionally configure multiple GitHub Apps via the `additional_github_apps` variable. When configured, the control-plane lambdas (scale-up, scale-down, pool, job-retry) randomly select an app for each GitHub API call, spreading the rate limit usage across all apps. Only the primary app needs a webhook URL configured in GitHub. + ## Lambda distribution Per combination of OS and architecture a lambda distribution syncer will be created. For this example there will be three instances (windows X64, linux X64, linux ARM). diff --git a/examples/multi-runner/main.tf b/examples/multi-runner/main.tf index 0524a48859..f2cbcab741 100644 --- a/examples/multi-runner/main.tf +++ b/examples/multi-runner/main.tf @@ -117,6 +117,17 @@ module "runners" { webhook_secret = random_id.random.hex } + # Uncomment to distribute GitHub API rate limit usage across multiple GitHub Apps. + # Each additional app must be installed on the same repos/orgs as the primary app. + # The control-plane lambdas will randomly select an app for each API call. + # additional_github_apps = [ + # { + # key_base64 = var.additional_github_app_0.key_base64 + # id = var.additional_github_app_0.id + # installation_id = var.additional_github_app_0.installation_id # optional, avoids an API call + # }, + # ] + # Deploy webhook using the EventBridge eventbridge = { enable = true diff --git a/lambdas/functions/control-plane/src/github/auth.test.ts b/lambdas/functions/control-plane/src/github/auth.test.ts index 274496ea20..4e3535a609 100644 --- a/lambdas/functions/control-plane/src/github/auth.test.ts +++ b/lambdas/functions/control-plane/src/github/auth.test.ts @@ -6,7 +6,7 @@ import { getParameters } from '@aws-github-runner/aws-ssm-util'; import { generateKeyPairSync } from 'node:crypto'; import * as nock from 'nock'; -import { createGithubAppAuth, createOctokitClient } from './auth'; +import { createGithubAppAuth, createOctokitClient, getStoredInstallationId, resetAppCredentialsCache } from './auth'; import { describe, it, expect, beforeEach, vi } from 'vitest'; type MockProxy = T & { @@ -32,6 +32,7 @@ const mockedGetParameters = vi.mocked(getParameters); beforeEach(() => { vi.resetModules(); vi.clearAllMocks(); + resetAppCredentialsCache(); process.env = { ...cleanEnv }; process.env.PARAMETER_GITHUB_APP_ID_NAME = PARAMETER_GITHUB_APP_ID_NAME; process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME = PARAMETER_GITHUB_APP_KEY_BASE64_NAME; @@ -297,3 +298,99 @@ describe('Test createGithubAppAuth', () => { expect(result.token).toBe(token); }); }); + +describe('Test getStoredInstallationId', () => { + const decryptedValue = 'decryptedValue'; + const b64 = Buffer.from(decryptedValue, 'binary').toString('base64'); + + beforeEach(() => { + const mockedAuth = vi.fn(); + mockedAuth.mockResolvedValue({ token: 'token' }); + const mockWithHook = Object.assign(mockedAuth, { hook: vi.fn() }); + vi.mocked(createAppAuth).mockReturnValue(mockWithHook); + }); + + it('returns stored installation ID when configured', async () => { + const installationIdParam = `/actions-runner/${ENVIRONMENT}/github_app_installation_id`; + process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = installationIdParam; + mockedGetParameters.mockResolvedValueOnce( + new Map([ + [PARAMETER_GITHUB_APP_ID_NAME, GITHUB_APP_ID], + [PARAMETER_GITHUB_APP_KEY_BASE64_NAME, b64], + [installationIdParam, '12345'], + ]), + ); + + const result = await getStoredInstallationId(0); + expect(result).toBe(12345); + }); + + it('returns undefined when installation ID param is empty', async () => { + process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = ''; + mockedGetParameters.mockResolvedValueOnce( + new Map([ + [PARAMETER_GITHUB_APP_ID_NAME, GITHUB_APP_ID], + [PARAMETER_GITHUB_APP_KEY_BASE64_NAME, b64], + ]), + ); + + const result = await getStoredInstallationId(0); + expect(result).toBeUndefined(); + }); + + it('returns undefined when env var is not set', async () => { + delete process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME; + mockedGetParameters.mockResolvedValueOnce( + new Map([ + [PARAMETER_GITHUB_APP_ID_NAME, GITHUB_APP_ID], + [PARAMETER_GITHUB_APP_KEY_BASE64_NAME, b64], + ]), + ); + + const result = await getStoredInstallationId(0); + expect(result).toBeUndefined(); + }); + + it('returns undefined for out-of-bounds appIndex', async () => { + process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = ''; + mockedGetParameters.mockResolvedValueOnce( + new Map([ + [PARAMETER_GITHUB_APP_ID_NAME, GITHUB_APP_ID], + [PARAMETER_GITHUB_APP_KEY_BASE64_NAME, b64], + ]), + ); + + const result = await getStoredInstallationId(99); + expect(result).toBeUndefined(); + }); + + it('loads installation IDs for multi-app setup', async () => { + const app1IdParam = `/actions-runner/${ENVIRONMENT}/github_app_id`; + const app2IdParam = `/actions-runner/${ENVIRONMENT}/additional_github_app_0_id`; + const app1KeyParam = `/actions-runner/${ENVIRONMENT}/github_app_key_base64`; + const app2KeyParam = `/actions-runner/${ENVIRONMENT}/additional_github_app_0_key_base64`; + const app2InstallParam = `/actions-runner/${ENVIRONMENT}/additional_github_app_0_installation_id`; + + process.env.PARAMETER_GITHUB_APP_ID_NAME = `${app1IdParam}:${app2IdParam}`; + process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME = `${app1KeyParam}:${app2KeyParam}`; + process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = `:${app2InstallParam}`; + + mockedGetParameters.mockResolvedValueOnce( + new Map([ + [app1IdParam, '1'], + [app1KeyParam, b64], + [app2IdParam, '2'], + [app2KeyParam, b64], + [app2InstallParam, '67890'], + ]), + ); + + // Primary app (index 0) has no stored installation ID + const result0 = await getStoredInstallationId(0); + expect(result0).toBeUndefined(); + + // Additional app (index 1) has stored installation ID + const result1 = await getStoredInstallationId(1); + expect(result1).toBe(67890); + }); +}); diff --git a/lambdas/functions/control-plane/src/github/auth.ts b/lambdas/functions/control-plane/src/github/auth.ts index 9a572c48a8..9a8a59dcbb 100644 --- a/lambdas/functions/control-plane/src/github/auth.ts +++ b/lambdas/functions/control-plane/src/github/auth.ts @@ -27,6 +27,75 @@ import { EndpointDefaults } from '@octokit/types'; const logger = createChildLogger('gh-auth'); +interface GitHubAppCredential { + appId: number; + privateKey: string; + installationId?: number; +} + +let appCredentialsPromise: Promise | null = null; + +async function loadAppCredentials(): Promise { + if (!process.env.PARAMETER_GITHUB_APP_ID_NAME) { + throw new Error('Environment variable PARAMETER_GITHUB_APP_ID_NAME is not set'); + } + if (!process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME) { + throw new Error('Environment variable PARAMETER_GITHUB_APP_KEY_BASE64_NAME is not set'); + } + const idParams = process.env.PARAMETER_GITHUB_APP_ID_NAME.split(':').filter(Boolean); + const keyParams = process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME.split(':').filter(Boolean); + const installationIdParams = (process.env.PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME || '').split(':'); + if (idParams.length !== keyParams.length) { + throw new Error(`GitHub App parameter count mismatch: ${idParams.length} IDs vs ${keyParams.length} keys`); + } + // Batch fetch all SSM parameters in a single call to reduce API calls + const allParamNames = [...idParams, ...keyParams, ...installationIdParams.filter((p) => p.length > 0)]; + const params = await getParameters(allParamNames); + + const credentials: GitHubAppCredential[] = []; + for (let i = 0; i < idParams.length; i++) { + const appIdValue = params.get(idParams[i]); + if (!appIdValue) { + throw new Error(`Parameter ${idParams[i]} not found`); + } + const appId = parseInt(appIdValue); + const privateKeyBase64 = params.get(keyParams[i]); + if (!privateKeyBase64) { + throw new Error(`Parameter ${keyParams[i]} not found`); + } + // replace literal \n characters with new lines to allow the key to be stored as a + // single line variable. This logic should match how the GitHub Terraform provider + // processes private keys to retain compatibility between the projects + const privateKey = Buffer.from(privateKeyBase64, 'base64').toString().replace('/[\\n]/g', String.fromCharCode(10)); + const installationIdParam = installationIdParams[i]; + const installationId = + installationIdParam && installationIdParam.length > 0 + ? parseInt(params.get(installationIdParam) || '') + : undefined; + credentials.push({ appId, privateKey, installationId }); + } + logger.info(`Loaded ${credentials.length} GitHub App credential(s)`); + return credentials; +} + +function getAppCredentials(): Promise { + if (!appCredentialsPromise) appCredentialsPromise = loadAppCredentials(); + return appCredentialsPromise; +} + +export async function getAppCount(): Promise { + return (await getAppCredentials()).length; +} + +export function resetAppCredentialsCache(): void { + appCredentialsPromise = null; +} + +export async function getStoredInstallationId(appIndex: number): Promise { + const credentials = await getAppCredentials(); + return credentials[appIndex]?.installationId; +} + export async function createOctokitClient(token: string, ghesApiUrl = ''): Promise { const CustomOctokit = Octokit.plugin(retry, throttling); const ocktokitOptions: OctokitOptions = { @@ -67,19 +136,24 @@ export async function createOctokitClient(token: string, ghesApiUrl = ''): Promi export async function createGithubAppAuth( installationId: number | undefined, ghesApiUrl = '', -): Promise { - const auth = await createAuth(installationId, ghesApiUrl); - const appAuthOptions: AppAuthOptions = { type: 'app' }; - return auth(appAuthOptions); + appIndex?: number, +): Promise { + const credentials = await getAppCredentials(); + const idx = appIndex ?? Math.floor(Math.random() * credentials.length); + const auth = await createAuth(installationId, ghesApiUrl, idx); + const result = await auth({ type: 'app' }); + return { ...result, appIndex: idx }; } export async function createGithubInstallationAuth( installationId: number | undefined, ghesApiUrl = '', + appIndex?: number, ): Promise { - const auth = await createAuth(installationId, ghesApiUrl); - const installationAuthOptions: InstallationAuthOptions = { type: 'installation', installationId }; - return auth(installationAuthOptions); + const credentials = await getAppCredentials(); + const idx = appIndex ?? Math.floor(Math.random() * credentials.length); + const auth = await createAuth(installationId, ghesApiUrl, idx); + return auth({ type: 'installation', installationId }); } function signJwt(payload: Record, privateKey: string): string { @@ -90,33 +164,16 @@ function signJwt(payload: Record, privateKey: string): string { return `${message}.${signature}`; } -async function createAuth(installationId: number | undefined, ghesApiUrl: string): Promise { - const appIdParamName = process.env.PARAMETER_GITHUB_APP_ID_NAME; - const appKeyParamName = process.env.PARAMETER_GITHUB_APP_KEY_BASE64_NAME; - if (!appIdParamName) { - throw new Error('Environment variable PARAMETER_GITHUB_APP_ID_NAME is not set'); - } - if (!appKeyParamName) { - throw new Error('Environment variable PARAMETER_GITHUB_APP_KEY_BASE64_NAME is not set'); - } - - // Batch fetch both App ID and Private Key in a single SSM API call - const paramNames = [appIdParamName, appKeyParamName]; - const params = await getParameters(paramNames); - const appIdValue = params.get(appIdParamName); - const privateKeyBase64 = params.get(appKeyParamName); - if (!appIdValue) { - throw new Error(`Parameter ${appIdParamName} not found`); - } - if (!privateKeyBase64) { - throw new Error(`Parameter ${appKeyParamName} not found`); - } +async function createAuth( + installationId: number | undefined, + ghesApiUrl: string, + appIndex?: number, +): Promise { + const credentials = await getAppCredentials(); + const selected = + appIndex !== undefined ? credentials[appIndex] : credentials[Math.floor(Math.random() * credentials.length)]; - const appId = parseInt(appIdValue); - // replace literal \n characters with new lines to allow the key to be stored as a - // single line variable. This logic should match how the GitHub Terraform provider - // processes private keys to retain compatibility between the projects - const privateKey = Buffer.from(privateKeyBase64, 'base64').toString().replace('/[\\n]/g', String.fromCharCode(10)); + logger.debug(`Selected GitHub App ${selected.appId} for authentication`); // Use a custom createJwt callback to include a jti (JWT ID) claim in every token. // Without this, concurrent Lambda invocations generating JWTs within the same second @@ -126,11 +183,11 @@ async function createAuth(installationId: number | undefined, ghesApiUrl: string const now = Math.floor(Date.now() / 1000) + (timeDifference ?? 0); const iat = now - 30; const exp = iat + 600; - const jwt = signJwt({ iat, exp, iss: appId, jti: randomUUID() }, privateKey); + const jwt = signJwt({ iat, exp, iss: appId, jti: randomUUID() }, selected.privateKey); return { jwt, expiresAt: new Date(exp * 1000).toISOString() }; }; - let authOptions: StrategyOptions = { appId, createJwt }; + let authOptions: StrategyOptions = { appId: selected.appId, createJwt }; if (installationId) authOptions = { ...authOptions, installationId }; logger.debug(`GHES API URL: ${ghesApiUrl}`); diff --git a/lambdas/functions/control-plane/src/github/octokit.test.ts b/lambdas/functions/control-plane/src/github/octokit.test.ts index 3e37d64757..653594204a 100644 --- a/lambdas/functions/control-plane/src/github/octokit.test.ts +++ b/lambdas/functions/control-plane/src/github/octokit.test.ts @@ -15,7 +15,9 @@ vi.mock('../github/auth', async () => ({ return { token: 'token', type: 'installation', installationId: installationId }; }), createOctokitClient: vi.fn().mockImplementation(() => new Octokit()), - createGithubAppAuth: vi.fn().mockResolvedValue({ token: 'token' }), + createGithubAppAuth: vi.fn().mockResolvedValue({ token: 'token', appIndex: 0 }), + getAppCount: vi.fn().mockResolvedValue(1), + getStoredInstallationId: vi.fn().mockResolvedValue(undefined), })); vi.mock('@octokit/rest', async () => ({ diff --git a/lambdas/functions/control-plane/src/github/octokit.ts b/lambdas/functions/control-plane/src/github/octokit.ts index a2cce5f55d..fb646c5bb2 100644 --- a/lambdas/functions/control-plane/src/github/octokit.ts +++ b/lambdas/functions/control-plane/src/github/octokit.ts @@ -1,17 +1,32 @@ import { Octokit } from '@octokit/rest'; import { ActionRequestMessage } from '../scale-runners/scale-up'; -import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from './auth'; +import { + createGithubAppAuth, + createGithubInstallationAuth, + createOctokitClient, + getAppCount, + getStoredInstallationId, +} from './auth'; export async function getInstallationId( ghesApiUrl: string, enableOrgLevel: boolean, payload: ActionRequestMessage, + appIndex?: number, ): Promise { - if (payload.installationId !== 0) { + // Use pre-stored installation ID when available (avoids an API call) + if (appIndex !== undefined) { + const storedId = await getStoredInstallationId(appIndex); + if (storedId !== undefined) return storedId; + } + + const multiApp = (await getAppCount()) > 1; + + if (!multiApp && payload.installationId !== 0) { return payload.installationId; } - const ghAuth = await createGithubAppAuth(undefined, ghesApiUrl); + const ghAuth = await createGithubAppAuth(undefined, ghesApiUrl, appIndex); const githubClient = await createOctokitClient(ghAuth.token, ghesApiUrl); return enableOrgLevel ? ( @@ -40,7 +55,11 @@ export async function getOctokit( enableOrgLevel: boolean, payload: ActionRequestMessage, ): Promise { - const installationId = await getInstallationId(ghesApiUrl, enableOrgLevel, payload); - const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); - return await createOctokitClient(ghAuth.token, ghesApiUrl); + // Select one app for this entire auth flow + const ghAuth = await createGithubAppAuth(undefined, ghesApiUrl); + const appIdx = ghAuth.appIndex; + + const installationId = await getInstallationId(ghesApiUrl, enableOrgLevel, payload, appIdx); + const installationAuth = await createGithubInstallationAuth(installationId, ghesApiUrl, appIdx); + return await createOctokitClient(installationAuth.token, ghesApiUrl); } diff --git a/lambdas/functions/control-plane/src/github/rate-limit.ts b/lambdas/functions/control-plane/src/github/rate-limit.ts index 8ebb8e3f84..b41aa5fe81 100644 --- a/lambdas/functions/control-plane/src/github/rate-limit.ts +++ b/lambdas/functions/control-plane/src/github/rate-limit.ts @@ -9,7 +9,8 @@ let appIdPromise: Promise | null = null; async function getAppId(): Promise { if (!appIdPromise) { - appIdPromise = getParameter(process.env.PARAMETER_GITHUB_APP_ID_NAME); + const paramName = process.env.PARAMETER_GITHUB_APP_ID_NAME.split(':')[0]; + appIdPromise = getParameter(paramName); } return appIdPromise; } diff --git a/lambdas/functions/control-plane/src/pool/pool.test.ts b/lambdas/functions/control-plane/src/pool/pool.test.ts index ee4e36a463..fee106a861 100644 --- a/lambdas/functions/control-plane/src/pool/pool.test.ts +++ b/lambdas/functions/control-plane/src/pool/pool.test.ts @@ -34,6 +34,7 @@ vi.mock('./../github/auth', async () => ({ createGithubAppAuth: vi.fn(), createGithubInstallationAuth: vi.fn(), createOctokitClient: vi.fn(), + getStoredInstallationId: vi.fn().mockResolvedValue(undefined), })); vi.mock('../scale-runners/scale-up', async () => ({ @@ -166,6 +167,7 @@ beforeEach(() => { token: 'token', appId: 1, expiresAt: 'some-date', + appIndex: 0, }); mockedInstallationAuth.mockResolvedValue({ type: 'token', @@ -360,4 +362,46 @@ describe('Test simple pool.', () => { ); }); }); + + describe('Multi-app round-robin', () => { + beforeEach(() => { + (getGitHubEnterpriseApiUrl as ReturnType).mockReturnValue({ + ghesApiUrl: '', + ghesBaseUrl: '', + }); + }); + + it('passes the same appIndex to createGithubInstallationAuth', async () => { + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 42, + expiresAt: 'some-date', + appIndex: 1, + }); + + await adjust({ poolSize: 3 }); + + expect(mockedInstallationAuth).toHaveBeenCalledWith( + expect.any(Number), + expect.any(String), + 1, // appIndex must match the one from createGithubAppAuth + ); + }); + + it('looks up installationId using the selected app JWT', async () => { + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'app-token-for-selected-app', + appId: 42, + expiresAt: 'some-date', + appIndex: 1, + }); + + await adjust({ poolSize: 3 }); + + // Should look up installationId via the API + expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalledWith({ org: ORG }); + }); + }); }); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index cece8d9951..2bff4fe72c 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -4,7 +4,12 @@ import yn from 'yn'; import { bootTimeExceeded, listEC2Runners } from '../aws/runners'; import { RunnerList } from '../aws/runners.d'; -import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth'; +import { + createGithubAppAuth, + createGithubInstallationAuth, + createOctokitClient, + getStoredInstallationId, +} from '../github/auth'; import { createRunners, getGitHubEnterpriseApiUrl } from '../scale-runners/scale-up'; import { validateSsmParameterStoreTags } from '../scale-runners/scale-up'; @@ -50,8 +55,16 @@ export async function adjust(event: PoolEvent): Promise { const { ghesApiUrl, ghesBaseUrl } = getGitHubEnterpriseApiUrl(); - const installationId = await getInstallationId(ghesApiUrl, runnerOwner); - const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const ghAppAuth = await createGithubAppAuth(undefined, ghesApiUrl); + const appIdx = ghAppAuth.appIndex; + + // Use pre-stored installation ID when available (avoids an API call) + let installationId = await getStoredInstallationId(appIdx); + if (installationId === undefined) { + const githubAppClient = await createOctokitClient(ghAppAuth.token, ghesApiUrl); + installationId = (await githubAppClient.apps.getOrgInstallation({ org: runnerOwner })).data.id; + } + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl, appIdx); const githubInstallationClient = await createOctokitClient(ghAuth.token, ghesApiUrl); // Get statuses of runners registered in GitHub @@ -113,17 +126,6 @@ export async function adjust(event: PoolEvent): Promise { } } -async function getInstallationId(ghesApiUrl: string, org: string): Promise { - const ghAuth = await createGithubAppAuth(undefined, ghesApiUrl); - const githubClient = await createOctokitClient(ghAuth.token, ghesApiUrl); - - return ( - await githubClient.apps.getOrgInstallation({ - org, - }) - ).data.id; -} - function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map): number { // Runner should be considered idle if it is still booting, or is idle in GitHub let numberOfRunnersInPool = 0; diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts index 2dfb190a38..42fd442a3f 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts @@ -45,6 +45,7 @@ vi.mock('./../github/auth', async () => ({ createGithubAppAuth: vi.fn(), createGithubInstallationAuth: vi.fn(), createOctokitClient: vi.fn(), + getStoredInstallationId: vi.fn().mockResolvedValue(undefined), })); vi.mock('./cache', async () => ({ @@ -169,6 +170,7 @@ describe('Scale down runners', () => { token: 'token', appId: 1, expiresAt: 'some-date', + appIndex: 0, }); mockedInstallationAuth.mockResolvedValue({ type: 'token', @@ -773,6 +775,30 @@ describe('Scale down runners', () => { expect(runnersTest[2].launchTime).not.toBeDefined(); }); }); + + describe('Multi-app round-robin', () => { + it('passes the same appIndex to createGithubInstallationAuth', async () => { + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 42, + expiresAt: 'some-date', + appIndex: 1, + }); + + const runners = [createRunnerTestData('idle-1', 'Org', MINIMUM_TIME_RUNNING_IN_MINUTES + 1, true, false, true)]; + mockGitHubRunners(runners); + mockAwsRunners(runners); + + await scaleDown(); + + expect(mockedInstallationAuth).toHaveBeenCalledWith( + expect.anything(), + expect.any(String), + 1, // appIndex must match the one from createGithubAppAuth + ); + }); + }); }); function mockAwsRunners(runners: RunnerTestItem[]) { diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts index 6086af7714..c92dddfca9 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts @@ -4,7 +4,12 @@ import { RequestError } from '@octokit/request-error'; import { createChildLogger } from '@aws-github-runner/aws-powertools-util'; import moment from 'moment'; -import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth'; +import { + createGithubAppAuth, + createGithubInstallationAuth, + createOctokitClient, + getStoredInstallationId, +} from '../github/auth'; import { bootTimeExceeded, listEC2Runners, tag, untag, terminateRunner } from './../aws/runners'; import { RunnerInfo, RunnerList } from './../aws/runners.d'; import { GhRunners, githubCache } from './cache'; @@ -30,22 +35,27 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise { logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`); const { ghesApiUrl } = getGitHubEnterpriseApiUrl(); const ghAuthPre = await createGithubAppAuth(undefined, ghesApiUrl); - const githubClientPre = await createOctokitClient(ghAuthPre.token, ghesApiUrl); - - const installationId = - runner.type === 'Org' - ? ( - await githubClientPre.apps.getOrgInstallation({ - org: runner.owner, - }) - ).data.id - : ( - await githubClientPre.apps.getRepoInstallation({ - owner: runner.owner.split('/')[0], - repo: runner.owner.split('/')[1], - }) - ).data.id; - const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const appIdx = ghAuthPre.appIndex; + + // Use pre-stored installation ID when available (avoids an API call) + let installationId = await getStoredInstallationId(appIdx); + if (installationId === undefined) { + const githubClientPre = await createOctokitClient(ghAuthPre.token, ghesApiUrl); + installationId = + runner.type === 'Org' + ? ( + await githubClientPre.apps.getOrgInstallation({ + org: runner.owner, + }) + ).data.id + : ( + await githubClientPre.apps.getRepoInstallation({ + owner: runner.owner.split('/')[0], + repo: runner.owner.split('/')[1], + }) + ).data.id; + } + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl, appIdx); const octokit = await createOctokitClient(ghAuth.token, ghesApiUrl); githubCache.clients.set(key, octokit); diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts index 8ac2c14489..52f6a6973f 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts @@ -52,6 +52,8 @@ vi.mock('./../github/auth', async () => ({ createGithubAppAuth: vi.fn(), createGithubInstallationAuth: vi.fn(), createOctokitClient: vi.fn(), + getAppCount: vi.fn().mockResolvedValue(1), + getStoredInstallationId: vi.fn().mockResolvedValue(undefined), })); vi.mock('@aws-github-runner/aws-ssm-util', async () => { @@ -161,6 +163,7 @@ beforeEach(() => { token: 'token', appId: TEST_DATA_SINGLE.installationId, expiresAt: 'some-date', + appIndex: 0, }); mockedInstallationAuth.mockResolvedValue({ type: 'token', @@ -820,8 +823,8 @@ describe('scaleUp with GHES', () => { await scaleUpModule.scaleUp(messages); expect(mockCreateClient).toHaveBeenCalledTimes(3); // 1 app client, 2 repo installation clients - expect(mockedInstallationAuth).toHaveBeenCalledWith(100, 'https://github.enterprise.something/api/v3'); - expect(mockedInstallationAuth).toHaveBeenCalledWith(200, 'https://github.enterprise.something/api/v3'); + expect(mockedInstallationAuth).toHaveBeenCalledWith(100, 'https://github.enterprise.something/api/v3', 0); + expect(mockedInstallationAuth).toHaveBeenCalledWith(200, 'https://github.enterprise.something/api/v3', 0); }); it('Should reuse GitHub clients for same installation', async () => { @@ -1281,8 +1284,8 @@ describe('scaleUp with public GH', () => { await scaleUpModule.scaleUp(messages); expect(mockCreateClient).toHaveBeenCalledTimes(3); // 1 app client, 2 repo installation clients - expect(mockedInstallationAuth).toHaveBeenCalledWith(100, ''); - expect(mockedInstallationAuth).toHaveBeenCalledWith(200, ''); + expect(mockedInstallationAuth).toHaveBeenCalledWith(100, '', 0); + expect(mockedInstallationAuth).toHaveBeenCalledWith(200, '', 0); }); it('Should reuse GitHub clients for same installation', async () => { @@ -1808,8 +1811,8 @@ describe('scaleUp with Github Data Residency', () => { await scaleUpModule.scaleUp(messages); expect(mockCreateClient).toHaveBeenCalledTimes(3); // 1 app client, 2 repo installation clients - expect(mockedInstallationAuth).toHaveBeenCalledWith(100, ''); - expect(mockedInstallationAuth).toHaveBeenCalledWith(200, ''); + expect(mockedInstallationAuth).toHaveBeenCalledWith(100, '', 0); + expect(mockedInstallationAuth).toHaveBeenCalledWith(200, '', 0); }); it('Should reuse GitHub clients for same installation', async () => { @@ -2019,6 +2022,85 @@ describe('Retry mechanism tests', () => { }); }); +describe('Multi-app round-robin', () => { + const mockedGetAppCount = vi.mocked(ghAuth.getAppCount); + + beforeEach(() => { + process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; + process.env.ENABLE_EPHEMERAL_RUNNERS = 'true'; + process.env.ENABLE_JOB_QUEUED_CHECK = 'false'; + process.env.RUNNERS_MAXIMUM_COUNT = '10'; + process.env.RUNNER_NAME_PREFIX = 'unit-test-'; + process.env.RUNNER_GROUP_NAME = 'Default'; + process.env.SSM_CONFIG_PATH = '/github-action-runners/default/runners/config'; + process.env.SSM_TOKEN_PATH = '/github-action-runners/default/runners/config'; + process.env.RUNNER_LABELS = 'label1,label2'; + expectedRunnerParams = { ...EXPECTED_RUNNER_PARAMS }; + mockSSMClient.reset(); + }); + + it('passes the same appIndex to createGithubInstallationAuth when multi-app is active', async () => { + mockedGetAppCount.mockResolvedValue(2); + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 42, + expiresAt: 'some-date', + appIndex: 1, + }); + + await scaleUpModule.scaleUp([{ ...TEST_DATA_SINGLE, installationId: 0 }]); + + expect(mockedInstallationAuth).toHaveBeenCalledWith( + expect.any(Number), + expect.any(String), + 1, // appIndex must match the one from createGithubAppAuth + ); + }); + + it('looks up installationId via API when multi-app, even if webhook has installationId', async () => { + mockedGetAppCount.mockResolvedValue(2); + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 42, + expiresAt: 'some-date', + appIndex: 1, + }); + + // webhook payload has installationId = 999 (belongs to primary app) + await scaleUpModule.scaleUp([{ ...TEST_DATA_SINGLE, installationId: 999 }]); + + // Should NOT use 999 from webhook — should look up via API instead + expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalledWith({ + org: TEST_DATA_SINGLE.repositoryOwner, + }); + // installationId passed to createGithubInstallationAuth should come from API (2), not webhook (999) + expect(mockedInstallationAuth).toHaveBeenCalledWith( + TEST_DATA_SINGLE.installationId, // from mockOctokit.apps.getOrgInstallation mock + expect.any(String), + 1, + ); + }); + + it('uses webhook installationId when single-app (no API lookup needed)', async () => { + mockedGetAppCount.mockResolvedValue(1); + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 42, + expiresAt: 'some-date', + appIndex: 0, + }); + + await scaleUpModule.scaleUp([{ ...TEST_DATA_SINGLE, installationId: 999 }]); + + // Should use 999 from webhook directly — no API lookup + expect(mockOctokit.apps.getOrgInstallation).not.toHaveBeenCalled(); + expect(mockedInstallationAuth).toHaveBeenCalledWith(999, expect.any(String), 0); + }); +}); + function defaultOctokitMockImpl() { mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({ data: { diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 395c87e8f8..ba5ffaa907 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -3,7 +3,13 @@ import { addPersistentContextToChildLogger, createChildLogger } from '@aws-githu import { getParameter, putParameter } from '@aws-github-runner/aws-ssm-util'; import yn from 'yn'; -import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth'; +import { + createGithubAppAuth, + createGithubInstallationAuth, + createOctokitClient, + getAppCount, + getStoredInstallationId, +} from '../github/auth'; import { createRunner, listEC2Runners, tag, terminateRunner } from './../aws/runners'; import { RunnerInputParameters } from './../aws/runners.d'; import { metricGitHubAppRateLimit } from '../github/rate-limit'; @@ -153,8 +159,16 @@ export async function getInstallationId( githubAppClient: Octokit, enableOrgLevel: boolean, payload: ActionRequestMessage, + multiApp = false, + appIndex?: number, ): Promise { - if (payload.installationId !== 0) { + // Use pre-stored installation ID when available (avoids an API call) + if (appIndex !== undefined) { + const storedId = await getStoredInstallationId(appIndex); + if (storedId !== undefined) return storedId; + } + + if (!multiApp && payload.installationId !== 0) { return payload.installationId; } @@ -325,7 +339,9 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise 1; // A map of either owner or owner/repo name to Octokit client, so we use a // single client per installation (set of messages), depending on how the app @@ -373,9 +389,9 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise This module replaces the top-level module to make it easy to create with one deployment multiple type of runners. -This module creates many runners with a single GitHub app. The module utilizes the internal modules and deploys parts of the stack for each runner defined. +This module creates many runners with one or more GitHub Apps. The module utilizes the internal modules and deploys parts of the stack for each runner defined. + +### GitHub App round-robin + +To distribute GitHub API rate limit usage, this module supports configuring multiple GitHub Apps via the `additional_github_apps` variable. The control-plane lambdas (scale-up, scale-down, pool, job-retry) randomly select an app for each API call, spreading the load across all configured apps. + +The **primary app** (`github_app`) is special: +- Its **webhook secret** is used to validate incoming GitHub webhook payloads. Only the primary app needs a webhook URL configured in GitHub. +- Its **app ID and private key** are included in the round-robin pool alongside the additional apps. + +Additional apps only need `id` and `key_base64` credentials (no webhook secret). They must be installed on the same repositories/organizations as the primary app. + +The **webhook lambda** does not participate in round-robin: it only validates incoming webhook signatures using the primary app's webhook secret and never calls the GitHub API. The module takes a configuration as input containing a matcher for the labels. The [webhook](https://github-aws-runners.github.io/terraform-aws-github-runner/modules/internal/webhook/) lambda is using the configuration to delegate events based on the labels in the workflow job and sent them to a dedicated queue based on the configuration. Events on each queue are processed by a dedicated lambda per configuration to scale runners. @@ -37,6 +49,15 @@ module "multi-runner" { # app details } + # Optional: distribute GitHub API rate limit across multiple apps + # additional_github_apps = [ + # { + # key_base64 = "base64-encoded-private-key" + # id = "123456" + # installation_id = "789" # optional, avoids an API call per invocation + # }, + # ] + multi_runner_config = { "linux-arm" = { matcherConfig : { @@ -115,6 +136,7 @@ module "multi-runner" { | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [additional\_github\_apps](#input\_additional\_github\_apps) | Additional GitHub Apps for round-robin API rate limit distribution.

The primary app (var.github\_app) is always included and is the one whose
webhook secret is used for incoming webhook signature validation. Only the
primary app needs a webhook configured in GitHub.

Additional apps listed here are used exclusively by the control-plane
lambdas (scale-up, scale-down, pool, job-retry) which randomly select an
app for each GitHub API call. Each additional app must be installed on the
same repositories/organizations as the primary app. |
list(object({
key_base64 = optional(string)
key_base64_ssm = optional(object({ arn = string, name = string }))
id = optional(string)
id_ssm = optional(object({ arn = string, name = string }))
installation_id = optional(string)
installation_id_ssm = optional(object({ arn = string, name = string }))
}))
| `[]` | no | | [ami\_housekeeper\_cleanup\_config](#input\_ami\_housekeeper\_cleanup\_config) | Configuration for AMI cleanup. |
object({
maxItems = optional(number)
minimumDaysOld = optional(number)
amiFilters = optional(list(object({
Name = string
Values = list(string)
})))
launchTemplateNames = optional(list(string))
ssmParameterNames = optional(list(string))
dryRun = optional(bool)
})
| `{}` | no | | [ami\_housekeeper\_lambda\_memory\_size](#input\_ami\_housekeeper\_lambda\_memory\_size) | Memory size limit in MB of the lambda. | `number` | `256` | no | | [ami\_housekeeper\_lambda\_s3\_key](#input\_ami\_housekeeper\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/modules/multi-runner/main.tf b/modules/multi-runner/main.tf index 905cc7f793..debc4a378b 100644 --- a/modules/multi-runner/main.tf +++ b/modules/multi-runner/main.tf @@ -3,9 +3,22 @@ locals { "ghr:environment" = var.prefix }) + primary_app_id = coalesce(var.github_app.id_ssm, module.ssm.parameters.github_app_id) + primary_app_key_base64 = coalesce(var.github_app.key_base64_ssm, module.ssm.parameters.github_app_key_base64) + github_app_parameters = { - id = coalesce(var.github_app.id_ssm, module.ssm.parameters.github_app_id) - key_base64 = coalesce(var.github_app.key_base64_ssm, module.ssm.parameters.github_app_key_base64) + id = concat( + [local.primary_app_id], + [for p in module.ssm.additional_app_parameters : p.id] + ) + key_base64 = concat( + [local.primary_app_key_base64], + [for p in module.ssm.additional_app_parameters : p.key_base64] + ) + installation_id = concat( + [null], + [for p in module.ssm.additional_app_parameters : p.installation_id] + ) webhook_secret = coalesce(var.github_app.webhook_secret_ssm, module.ssm.parameters.github_app_webhook_secret) } diff --git a/modules/multi-runner/outputs.tf b/modules/multi-runner/outputs.tf index 7ce7171faf..b22278accf 100644 --- a/modules/multi-runner/outputs.tf +++ b/modules/multi-runner/outputs.tf @@ -45,11 +45,20 @@ output "webhook" { } output "ssm_parameters" { - value = { for k, v in local.github_app_parameters : k => { - name = v.name - arn = v.arn - } - } + value = merge( + { for idx, v in local.github_app_parameters.id : "github_app_id_${idx}" => { + name = v.name + arn = v.arn + } }, + { for idx, v in local.github_app_parameters.key_base64 : "github_app_key_base64_${idx}" => { + name = v.name + arn = v.arn + } }, + { "github_app_webhook_secret" = { + name = local.github_app_parameters.webhook_secret.name + arn = local.github_app_parameters.webhook_secret.arn + } }, + ) } output "instance_termination_watcher" { diff --git a/modules/multi-runner/ssm.tf b/modules/multi-runner/ssm.tf index 6a3a234e6f..3e4b740fdd 100644 --- a/modules/multi-runner/ssm.tf +++ b/modules/multi-runner/ssm.tf @@ -1,7 +1,8 @@ module "ssm" { - source = "../ssm" - kms_key_arn = var.kms_key_arn - path_prefix = "${local.ssm_root_path}/${var.ssm_paths.app}" - github_app = var.github_app - tags = local.tags + source = "../ssm" + kms_key_arn = var.kms_key_arn + path_prefix = "${local.ssm_root_path}/${var.ssm_paths.app}" + github_app = var.github_app + additional_github_apps = var.additional_github_apps + tags = local.tags } diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index 613cf8b2ce..4834abc28f 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -36,6 +36,38 @@ variable "github_app" { } +variable "additional_github_apps" { + description = <<-EOF + Additional GitHub Apps for round-robin API rate limit distribution. + + The primary app (var.github_app) is always included and is the one whose + webhook secret is used for incoming webhook signature validation. Only the + primary app needs a webhook configured in GitHub. + + Additional apps listed here are used exclusively by the control-plane + lambdas (scale-up, scale-down, pool, job-retry) which randomly select an + app for each GitHub API call. Each additional app must be installed on the + same repositories/organizations as the primary app. + EOF + type = list(object({ + key_base64 = optional(string) + key_base64_ssm = optional(object({ arn = string, name = string })) + id = optional(string) + id_ssm = optional(object({ arn = string, name = string })) + installation_id = optional(string) + installation_id_ssm = optional(object({ arn = string, name = string })) + })) + default = [] + validation { + condition = alltrue([ + for app in var.additional_github_apps : + (app.key_base64 != null || app.key_base64_ssm != null) && + (app.id != null || app.id_ssm != null) + ]) + error_message = "Each additional GitHub app must provide either key_base64 or key_base64_ssm, and either id or id_ssm." + } +} + variable "prefix" { description = "The prefix used for naming resources" type = string diff --git a/modules/runners/README.md b/modules/runners/README.md index 6a27276624..2c16544a74 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -162,7 +162,7 @@ yarn run dist | [enable\_userdata](#input\_enable\_userdata) | Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI | `bool` | `true` | no | | [ghes\_ssl\_verify](#input\_ghes\_ssl\_verify) | GitHub Enterprise SSL verification. Set to 'false' when custom certificate (chains) is used for GitHub Enterprise Server (insecure). | `bool` | `true` | no | | [ghes\_url](#input\_ghes\_url) | GitHub Enterprise Server URL. DO NOT SET IF USING PUBLIC GITHUB..However if you are using GitHub Enterprise Cloud with data-residency (ghe.com), set the endpoint here. Example - https://companyname.ghe.com\| | `string` | `null` | no | -| [github\_app\_parameters](#input\_github\_app\_parameters) | Parameter Store for GitHub App Parameters. |
object({
key_base64 = map(string)
id = map(string)
})
| n/a | yes | +| [github\_app\_parameters](#input\_github\_app\_parameters) | Parameter Store for GitHub App Parameters.

Supports multiple GitHub Apps for round-robin API rate limit distribution.
Each list element corresponds to one GitHub App and is a map containing
`name` and `arn` keys referencing SSM parameters. The first element is the
primary app (the one whose webhook secret is used for incoming webhook
validation). All apps must be installed on the same repositories/organizations.

The control-plane lambdas (scale-up, scale-down, pool, job-retry) randomly
select an app from the list for each GitHub API call, distributing rate
limit consumption across all configured apps. |
object({
key_base64 = list(map(string))
id = list(map(string))
installation_id = list(object({ name = string, arn = string }))
})
| n/a | yes | | [idle\_config](#input\_idle\_config) | List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle. |
list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
}))
| `[]` | no | | [instance\_allocation\_strategy](#input\_instance\_allocation\_strategy) | The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`. | `string` | `"lowest-price"` | no | | [instance\_max\_spot\_price](#input\_instance\_max\_spot\_price) | Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet. | `string` | `null` | no | diff --git a/modules/runners/job-retry/main.tf b/modules/runners/job-retry/main.tf index eba478b214..d5455951d0 100644 --- a/modules/runners/job-retry/main.tf +++ b/modules/runners/job-retry/main.tf @@ -3,14 +3,15 @@ locals { name = "job-retry" environment_variables = { - ENABLE_ORGANIZATION_RUNNERS = var.config.enable_organization_runners - ENABLE_METRIC_JOB_RETRY = var.config.metrics.enable && var.config.metrics.metric.enable_job_retry - ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.config.metrics.enable && var.config.metrics.metric.enable_github_app_rate_limit - GHES_URL = var.config.ghes_url - USER_AGENT = var.config.user_agent - JOB_QUEUE_SCALE_UP_URL = var.config.sqs_build_queue.url - PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name + ENABLE_ORGANIZATION_RUNNERS = var.config.enable_organization_runners + ENABLE_METRIC_JOB_RETRY = var.config.metrics.enable && var.config.metrics.metric.enable_job_retry + ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.config.metrics.enable && var.config.metrics.metric.enable_github_app_rate_limit + GHES_URL = var.config.ghes_url + USER_AGENT = var.config.user_agent + JOB_QUEUE_SCALE_UP_URL = var.config.sqs_build_queue.url + PARAMETER_GITHUB_APP_ID_NAME = join(":", [for p in var.config.github_app_parameters.id : p.name]) + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = join(":", [for p in var.config.github_app_parameters.key_base64 : p.name]) + PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = join(":", [for p in var.config.github_app_parameters.installation_id : p != null ? p.name : ""]) } config = merge(var.config, { @@ -62,11 +63,14 @@ resource "aws_iam_role_policy" "job_retry" { name = "job_retry-policy" role = module.job_retry.lambda.role.name policy = templatefile("${path.module}/policies/lambda.json", { - kms_key_arn = var.config.kms_key_arn != null ? var.config.kms_key_arn : "" - sqs_build_queue_arn = var.config.sqs_build_queue.arn - sqs_job_retry_queue_arn = aws_sqs_queue.job_retry_check_queue.arn - github_app_id_arn = var.config.github_app_parameters.id.arn - github_app_key_base64_arn = var.config.github_app_parameters.key_base64.arn + kms_key_arn = var.config.kms_key_arn != null ? var.config.kms_key_arn : "" + sqs_build_queue_arn = var.config.sqs_build_queue.arn + sqs_job_retry_queue_arn = aws_sqs_queue.job_retry_check_queue.arn + github_app_parameter_arns = jsonencode(concat( + [for p in var.config.github_app_parameters.id : p.arn], + [for p in var.config.github_app_parameters.key_base64 : p.arn], + [for p in var.config.github_app_parameters.installation_id : p.arn if p != null], + )) }) } diff --git a/modules/runners/job-retry/policies/lambda.json b/modules/runners/job-retry/policies/lambda.json index f1c9efd569..e1be5c6707 100644 --- a/modules/runners/job-retry/policies/lambda.json +++ b/modules/runners/job-retry/policies/lambda.json @@ -7,10 +7,7 @@ "ssm:GetParameter", "ssm:GetParameters" ], - "Resource": [ - "${github_app_key_base64_arn}", - "${github_app_id_arn}" - ] + "Resource": ${github_app_parameter_arns} }, { "Effect": "Allow", diff --git a/modules/runners/job-retry/variables.tf b/modules/runners/job-retry/variables.tf index 7ccfdf63b3..cb010d7552 100644 --- a/modules/runners/job-retry/variables.tf +++ b/modules/runners/job-retry/variables.tf @@ -44,8 +44,9 @@ variable "config" { ghes_url = optional(string, null) user_agent = optional(string, null) github_app_parameters = object({ - key_base64 = map(string) - id = map(string) + key_base64 = list(map(string)) + id = list(map(string)) + installation_id = list(object({ name = string, arn = string })) }) kms_key_arn = optional(string, null) lambda_event_source_mapping_batch_size = optional(number, 10) diff --git a/modules/runners/policies/lambda-scale-down.json b/modules/runners/policies/lambda-scale-down.json index 067a747c81..ee7b1db952 100644 --- a/modules/runners/policies/lambda-scale-down.json +++ b/modules/runners/policies/lambda-scale-down.json @@ -49,10 +49,7 @@ "ssm:GetParameter", "ssm:GetParameters" ], - "Resource": [ - "${github_app_key_base64_arn}", - "${github_app_id_arn}" - ] + "Resource": ${github_app_parameter_arns} %{ if kms_key_arn != "" ~} }, { diff --git a/modules/runners/policies/lambda-scale-up.json b/modules/runners/policies/lambda-scale-up.json index 93faf506a3..271754c005 100644 --- a/modules/runners/policies/lambda-scale-up.json +++ b/modules/runners/policies/lambda-scale-up.json @@ -33,12 +33,7 @@ "ssm:GetParameter", "ssm:GetParameters" ], - "Resource": [ - "${github_app_key_base64_arn}", - "${github_app_id_arn}", - "${ssm_config_path}/*", - "${ssm_ami_id_parameter_arn}" - ] + "Resource": ${github_app_parameter_arns} }, { "Effect": "Allow", diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index 5363f3c3fb..4cf8661686 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -17,38 +17,39 @@ resource "aws_lambda_function" "pool" { environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral - ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config - ENVIRONMENT = var.config.prefix - GHES_URL = var.config.ghes.url - USER_AGENT = var.config.user_agent - INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.config.instance_types) - LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name - LOG_LEVEL = var.config.lambda.log_level - NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes - RUNNER_LABELS = lower(join(",", var.config.runner.labels)) - RUNNER_GROUP_NAME = var.config.runner.group_name - RUNNER_NAME_PREFIX = var.config.runner.name_prefix - RUNNER_OWNER = var.config.runner.pool_owner - SSM_TOKEN_PATH = var.config.ssm_token_path - SSM_CONFIG_PATH = var.config.ssm_config_path - SUBNET_IDS = join(",", var.config.subnet_ids) - POWERTOOLS_SERVICE_NAME = "${var.config.prefix}-pool" - POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false - POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests - POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error - ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.config.runner.enable_on_demand_failover_for_errors) - SSM_PARAMETER_STORE_TAGS = var.config.lambda.parameter_store_tags - SCALE_ERRORS = jsonencode(var.config.runner.scale_errors) + AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral + ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config + ENVIRONMENT = var.config.prefix + GHES_URL = var.config.ghes.url + USER_AGENT = var.config.user_agent + INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.config.instance_types) + LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name + LOG_LEVEL = var.config.lambda.log_level + NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = join(":", [for p in var.config.github_app_parameters.id : p.name]) + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = join(":", [for p in var.config.github_app_parameters.key_base64 : p.name]) + PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = join(":", [for p in var.config.github_app_parameters.installation_id : p != null ? p.name : ""]) + POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes + RUNNER_LABELS = lower(join(",", var.config.runner.labels)) + RUNNER_GROUP_NAME = var.config.runner.group_name + RUNNER_NAME_PREFIX = var.config.runner.name_prefix + RUNNER_OWNER = var.config.runner.pool_owner + SSM_TOKEN_PATH = var.config.ssm_token_path + SSM_CONFIG_PATH = var.config.ssm_config_path + SUBNET_IDS = join(",", var.config.subnet_ids) + POWERTOOLS_SERVICE_NAME = "${var.config.prefix}-pool" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.config.runner.enable_on_demand_failover_for_errors) + SSM_PARAMETER_STORE_TAGS = var.config.lambda.parameter_store_tags + SCALE_ERRORS = jsonencode(var.config.runner.scale_errors) } } @@ -90,11 +91,14 @@ resource "aws_iam_role_policy" "pool" { policy = templatefile("${path.module}/policies/lambda-pool.json", { arn_ssm_parameters_path_config = var.config.arn_ssm_parameters_path_config arn_runner_instance_role = var.config.runner.role.arn - github_app_id_arn = var.config.github_app_parameters.id.arn - github_app_key_base64_arn = var.config.github_app_parameters.key_base64.arn - kms_key_arn = var.config.kms_key_arn - ami_kms_key_arn = var.config.ami_kms_key_arn - ssm_ami_id_parameter_arn = var.config.ami_id_ssm_parameter_arn + github_app_parameter_arns = jsonencode(concat( + [for p in var.config.github_app_parameters.id : p.arn], + [for p in var.config.github_app_parameters.key_base64 : p.arn], + [for p in var.config.github_app_parameters.installation_id : p.arn if p != null], + )) + kms_key_arn = var.config.kms_key_arn + ami_kms_key_arn = var.config.ami_kms_key_arn + ssm_ami_id_parameter_arn = var.config.ami_id_ssm_parameter_arn }) } diff --git a/modules/runners/pool/policies/lambda-pool.json b/modules/runners/pool/policies/lambda-pool.json index 91c9997ce4..51afd73b50 100644 --- a/modules/runners/pool/policies/lambda-pool.json +++ b/modules/runners/pool/policies/lambda-pool.json @@ -54,10 +54,7 @@ "ssm:GetParameter", "ssm:GetParameters" ], - "Resource": [ - "${github_app_key_base64_arn}", - "${github_app_id_arn}" - ] + "Resource": ${github_app_parameter_arns} %{ if kms_key_arn != "" ~} }, { diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index 4bfdd68010..a5b5a8d461 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -25,8 +25,9 @@ variable "config" { ssl_verify = string }) github_app_parameters = object({ - key_base64 = map(string) - id = map(string) + key_base64 = list(map(string)) + id = list(map(string)) + installation_id = list(object({ name = string, arn = string })) }) subnet_ids = list(string) runner = object({ diff --git a/modules/runners/scale-down.tf b/modules/runners/scale-down.tf index b304e8066e..a057a8fd92 100644 --- a/modules/runners/scale-down.tf +++ b/modules/runners/scale-down.tf @@ -22,23 +22,24 @@ resource "aws_lambda_function" "scale_down" { environment { variables = { - ENVIRONMENT = var.prefix - ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit - GHES_URL = var.ghes_url - USER_AGENT = var.user_agent - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes - SCALE_DOWN_CONFIG = jsonencode(var.idle_config) - POWERTOOLS_SERVICE_NAME = "${var.prefix}-scale-down" - POWERTOOLS_METRICS_NAMESPACE = var.metrics.namespace - POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false - POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests - POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + ENVIRONMENT = var.prefix + ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit + GHES_URL = var.ghes_url + USER_AGENT = var.user_agent + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = join(":", [for p in var.github_app_parameters.id : p.name]) + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = join(":", [for p in var.github_app_parameters.key_base64 : p.name]) + PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = join(":", [for p in var.github_app_parameters.installation_id : p != null ? p.name : ""]) + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes + SCALE_DOWN_CONFIG = jsonencode(var.idle_config) + POWERTOOLS_SERVICE_NAME = "${var.prefix}-scale-down" + POWERTOOLS_METRICS_NAMESPACE = var.metrics.namespace + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -97,10 +98,13 @@ resource "aws_iam_role_policy" "scale_down" { name = "scale-down-policy" role = aws_iam_role.scale_down.name policy = templatefile("${path.module}/policies/lambda-scale-down.json", { - environment = var.prefix - github_app_id_arn = var.github_app_parameters.id.arn - github_app_key_base64_arn = var.github_app_parameters.key_base64.arn - kms_key_arn = local.kms_key_arn + environment = var.prefix + github_app_parameter_arns = jsonencode(concat( + [for p in var.github_app_parameters.id : p.arn], + [for p in var.github_app_parameters.key_base64 : p.arn], + [for p in var.github_app_parameters.installation_id : p.arn if p != null], + )) + kms_key_arn = local.kms_key_arn }) } diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index c5503f6394..7e067c310b 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -25,43 +25,44 @@ resource "aws_lambda_function" "scale_up" { architectures = [var.lambda_architecture] environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = local.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners - ENABLE_JIT_CONFIG = var.enable_jit_config - ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check - ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit - ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners - ENVIRONMENT = var.prefix - GHES_URL = var.ghes_url - USER_AGENT = var.user_agent - INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.instance_types) - LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - POWERTOOLS_METRICS_NAMESPACE = var.metrics.namespace - POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false - POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests - POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error - RUNNER_LABELS = lower(join(",", var.runner_labels)) - RUNNER_GROUP_NAME = var.runner_group_name - RUNNER_NAME_PREFIX = var.runner_name_prefix - RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count - POWERTOOLS_SERVICE_NAME = "${var.prefix}-scale-up" - SSM_TOKEN_PATH = local.token_path - SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" - SSM_PARAMETER_STORE_TAGS = local.parameter_store_tags - SUBNET_IDS = join(",", var.subnet_ids) - ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.enable_on_demand_failover_for_errors) - SCALE_ERRORS = jsonencode(var.scale_errors) - JOB_RETRY_CONFIG = jsonencode(local.job_retry_config) + AMI_ID_SSM_PARAMETER_NAME = local.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners + ENABLE_JIT_CONFIG = var.enable_jit_config + ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check + ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit + ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + USER_AGENT = var.user_agent + INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.instance_types) + LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = join(":", [for p in var.github_app_parameters.id : p.name]) + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = join(":", [for p in var.github_app_parameters.key_base64 : p.name]) + PARAMETER_GITHUB_APP_INSTALLATION_ID_NAME = join(":", [for p in var.github_app_parameters.installation_id : p != null ? p.name : ""]) + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_METRICS_NAMESPACE = var.metrics.namespace + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + RUNNER_LABELS = lower(join(",", var.runner_labels)) + RUNNER_GROUP_NAME = var.runner_group_name + RUNNER_NAME_PREFIX = var.runner_name_prefix + RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count + POWERTOOLS_SERVICE_NAME = "${var.prefix}-scale-up" + SSM_TOKEN_PATH = local.token_path + SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" + SSM_PARAMETER_STORE_TAGS = local.parameter_store_tags + SUBNET_IDS = join(",", var.subnet_ids) + ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.enable_on_demand_failover_for_errors) + SCALE_ERRORS = jsonencode(var.scale_errors) + JOB_RETRY_CONFIG = jsonencode(local.job_retry_config) } } @@ -118,14 +119,17 @@ resource "aws_iam_role_policy" "scale_up" { name = "scale-up-policy" role = aws_iam_role.scale_up.name policy = templatefile("${path.module}/policies/lambda-scale-up.json", { - arn_runner_instance_role = aws_iam_role.runner.arn - sqs_arn = var.sqs_build_queue.arn - github_app_id_arn = var.github_app_parameters.id.arn - github_app_key_base64_arn = var.github_app_parameters.key_base64.arn - ssm_config_path = "arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${var.ssm_paths.root}/${var.ssm_paths.config}" - kms_key_arn = local.kms_key_arn - ami_kms_key_arn = local.ami_kms_key_arn - ssm_ami_id_parameter_arn = local.ami_id_ssm_module_managed ? aws_ssm_parameter.runner_ami_id[0].arn : var.ami.id_ssm_parameter_arn + arn_runner_instance_role = aws_iam_role.runner.arn + sqs_arn = var.sqs_build_queue.arn + github_app_parameter_arns = jsonencode(concat( + [for p in var.github_app_parameters.id : p.arn], + [for p in var.github_app_parameters.key_base64 : p.arn], + [for p in var.github_app_parameters.installation_id : p.arn if p != null], + ["arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${var.ssm_paths.root}/${var.ssm_paths.config}/*"] + )) + kms_key_arn = local.kms_key_arn + ami_kms_key_arn = local.ami_kms_key_arn + ssm_ami_id_parameter_arn = local.ami_id_ssm_module_managed ? aws_ssm_parameter.runner_ami_id[0].arn : var.ami.id_ssm_parameter_arn }) } diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index e2a33280b9..6c27868245 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -192,10 +192,23 @@ variable "enable_organization_runners" { } variable "github_app_parameters" { - description = "Parameter Store for GitHub App Parameters." + description = <<-EOF + Parameter Store for GitHub App Parameters. + + Supports multiple GitHub Apps for round-robin API rate limit distribution. + Each list element corresponds to one GitHub App and is a map containing + `name` and `arn` keys referencing SSM parameters. The first element is the + primary app (the one whose webhook secret is used for incoming webhook + validation). All apps must be installed on the same repositories/organizations. + + The control-plane lambdas (scale-up, scale-down, pool, job-retry) randomly + select an app from the list for each GitHub API call, distributing rate + limit consumption across all configured apps. + EOF type = object({ - key_base64 = map(string) - id = map(string) + key_base64 = list(map(string)) + id = list(map(string)) + installation_id = list(object({ name = string, arn = string })) }) } diff --git a/modules/ssm/outputs.tf b/modules/ssm/outputs.tf index 4017f6ab3d..462c7265be 100644 --- a/modules/ssm/outputs.tf +++ b/modules/ssm/outputs.tf @@ -14,3 +14,22 @@ output "parameters" { } } } + +output "additional_app_parameters" { + value = [ + for idx, app in var.additional_github_apps : { + id = { + name = app.id_ssm != null ? app.id_ssm.name : aws_ssm_parameter.additional_github_app_id[idx].name + arn = app.id_ssm != null ? app.id_ssm.arn : aws_ssm_parameter.additional_github_app_id[idx].arn + } + key_base64 = { + name = app.key_base64_ssm != null ? app.key_base64_ssm.name : aws_ssm_parameter.additional_github_app_key_base64[idx].name + arn = app.key_base64_ssm != null ? app.key_base64_ssm.arn : aws_ssm_parameter.additional_github_app_key_base64[idx].arn + } + installation_id = app.installation_id != null || app.installation_id_ssm != null ? { + name = app.installation_id_ssm != null ? app.installation_id_ssm.name : aws_ssm_parameter.additional_github_app_installation_id[idx].name + arn = app.installation_id_ssm != null ? app.installation_id_ssm.arn : aws_ssm_parameter.additional_github_app_installation_id[idx].arn + } : null + } + ] +} diff --git a/modules/ssm/ssm.tf b/modules/ssm/ssm.tf index 3f13333e68..f7002e3f1d 100644 --- a/modules/ssm/ssm.tf +++ b/modules/ssm/ssm.tf @@ -24,3 +24,30 @@ resource "aws_ssm_parameter" "github_app_webhook_secret" { key_id = local.kms_key_arn tags = var.tags } + +resource "aws_ssm_parameter" "additional_github_app_id" { + for_each = { for idx, app in var.additional_github_apps : idx => app if app.id_ssm == null } + name = "${var.path_prefix}/additional_github_app_${each.key}_id" + type = "SecureString" + value = each.value.id + key_id = local.kms_key_arn + tags = var.tags +} + +resource "aws_ssm_parameter" "additional_github_app_key_base64" { + for_each = { for idx, app in var.additional_github_apps : idx => app if app.key_base64_ssm == null } + name = "${var.path_prefix}/additional_github_app_${each.key}_key_base64" + type = "SecureString" + value = each.value.key_base64 + key_id = local.kms_key_arn + tags = var.tags +} + +resource "aws_ssm_parameter" "additional_github_app_installation_id" { + for_each = { for idx, app in var.additional_github_apps : idx => app if app.installation_id_ssm == null && nonsensitive(app.installation_id != null) } + name = "${var.path_prefix}/additional_github_app_${each.key}_installation_id" + type = "SecureString" + value = each.value.installation_id + key_id = local.kms_key_arn + tags = var.tags +} diff --git a/modules/ssm/variables.tf b/modules/ssm/variables.tf index 1eb796aea7..d7387ecc30 100644 --- a/modules/ssm/variables.tf +++ b/modules/ssm/variables.tf @@ -45,6 +45,19 @@ variable "kms_key_arn" { default = null } +variable "additional_github_apps" { + description = "Additional GitHub Apps for distributing API rate limit usage." + type = list(object({ + key_base64 = optional(string) + key_base64_ssm = optional(object({ arn = string, name = string })) + id = optional(string) + id_ssm = optional(object({ arn = string, name = string })) + installation_id = optional(string) + installation_id_ssm = optional(object({ arn = string, name = string })) + })) + default = [] +} + variable "tags" { description = "Map of tags that will be added to created resources. By default resources will be tagged with name and environment." type = map(string) diff --git a/variables.tf b/variables.tf index d739e916fb..eb45799f5b 100644 --- a/variables.tf +++ b/variables.tf @@ -67,6 +67,27 @@ variable "github_app" { } } +variable "additional_github_apps" { + description = "Additional GitHub Apps for distributing API rate limit usage. Each must be installed on the same repos/orgs as the primary app." + type = list(object({ + key_base64 = optional(string) + key_base64_ssm = optional(object({ arn = string, name = string })) + id = optional(string) + id_ssm = optional(object({ arn = string, name = string })) + installation_id = optional(string) + installation_id_ssm = optional(object({ arn = string, name = string })) + })) + default = [] + validation { + condition = alltrue([ + for app in var.additional_github_apps : + (app.key_base64 != null || app.key_base64_ssm != null) && + (app.id != null || app.id_ssm != null) + ]) + error_message = "Each additional GitHub app must provide either key_base64 or key_base64_ssm, and either id or id_ssm." + } +} + variable "scale_down_schedule_expression" { description = "Scheduler expression to check every x for scale down." type = string