@@ -10,6 +10,7 @@ import { createRunner, listEC2Runners } from './../aws/runners';
1010import { RunnerInputParameters } from './../aws/runners.d' ;
1111import * as scaleUpModule from './scale-up' ;
1212import { getParameter } from '@aws-github-runner/aws-ssm-util' ;
13+ import { publishRetryMessage } from './job-retry' ;
1314import { describe , it , expect , beforeEach , vi } from 'vitest' ;
1415import type { Octokit } from '@octokit/rest' ;
1516
@@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
3334const mockListRunners = vi . mocked ( listEC2Runners ) ;
3435const mockSSMClient = mockClient ( SSMClient ) ;
3536const mockSSMgetParameter = vi . mocked ( getParameter ) ;
37+ const mockPublishRetryMessage = vi . mocked ( publishRetryMessage ) ;
3638
3739vi . mock ( '@octokit/rest' , ( ) => ( {
3840 Octokit : vi . fn ( ) . mockImplementation ( function ( ) {
@@ -63,6 +65,11 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
6365 } ;
6466} ) ;
6567
68+ vi . mock ( './job-retry' , ( ) => ( {
69+ publishRetryMessage : vi . fn ( ) ,
70+ checkAndRetryJob : vi . fn ( ) ,
71+ } ) ) ;
72+
6673export type RunnerType = 'ephemeral' | 'non-ephemeral' ;
6774
6875// for ephemeral and non-ephemeral runners
@@ -1680,6 +1687,171 @@ describe('scaleUp with Github Data Residency', () => {
16801687 } ) ;
16811688} ) ;
16821689
1690+ describe ( 'Retry mechanism tests' , ( ) => {
1691+ beforeEach ( ( ) => {
1692+ process . env . ENABLE_ORGANIZATION_RUNNERS = 'true' ;
1693+ process . env . ENABLE_EPHEMERAL_RUNNERS = 'true' ;
1694+ process . env . ENABLE_JOB_QUEUED_CHECK = 'true' ;
1695+ process . env . RUNNERS_MAXIMUM_COUNT = '10' ;
1696+ expectedRunnerParams = { ...EXPECTED_RUNNER_PARAMS } ;
1697+ mockSSMClient . reset ( ) ;
1698+ } ) ;
1699+
1700+ const createTestMessages = (
1701+ count : number ,
1702+ overrides : Partial < scaleUpModule . ActionRequestMessageSQS > [ ] = [ ] ,
1703+ ) : scaleUpModule . ActionRequestMessageSQS [ ] => {
1704+ return Array . from ( { length : count } , ( _ , i ) => ( {
1705+ ...TEST_DATA_SINGLE ,
1706+ id : i + 1 ,
1707+ messageId : `message-${ i + 1 } ` ,
1708+ ...overrides [ i ] ,
1709+ } ) ) ;
1710+ } ;
1711+
1712+ it ( 'calls publishRetryMessage for each valid message when job is queued' , async ( ) => {
1713+ const messages = createTestMessages ( 3 ) ;
1714+ mockCreateRunner . mockResolvedValue ( [ 'i-12345' , 'i-67890' , 'i-abcdef' ] ) ; // Create all requested runners
1715+
1716+ await scaleUpModule . scaleUp ( messages ) ;
1717+
1718+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 3 ) ;
1719+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1720+ 1 ,
1721+ expect . objectContaining ( {
1722+ id : 1 ,
1723+ messageId : 'message-1' ,
1724+ } ) ,
1725+ ) ;
1726+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1727+ 2 ,
1728+ expect . objectContaining ( {
1729+ id : 2 ,
1730+ messageId : 'message-2' ,
1731+ } ) ,
1732+ ) ;
1733+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1734+ 3 ,
1735+ expect . objectContaining ( {
1736+ id : 3 ,
1737+ messageId : 'message-3' ,
1738+ } ) ,
1739+ ) ;
1740+ } ) ;
1741+
1742+ it ( 'does not call publishRetryMessage when job is not queued' , async ( ) => {
1743+ mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( params ) => {
1744+ const isQueued = params . job_id === 1 ; // Only job 1 is queued
1745+ return {
1746+ data : {
1747+ status : isQueued ? 'queued' : 'completed' ,
1748+ } ,
1749+ } ;
1750+ } ) ;
1751+
1752+ const messages = createTestMessages ( 3 ) ;
1753+
1754+ await scaleUpModule . scaleUp ( messages ) ;
1755+
1756+ // Only message with id 1 should trigger retry
1757+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 1 ) ;
1758+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1759+ expect . objectContaining ( {
1760+ id : 1 ,
1761+ messageId : 'message-1' ,
1762+ } ) ,
1763+ ) ;
1764+ } ) ;
1765+
1766+ it ( 'does not call publishRetryMessage when maximum runners is reached and messages are marked invalid' , async ( ) => {
1767+ process . env . RUNNERS_MAXIMUM_COUNT = '0' ; // No runners can be created
1768+
1769+ const messages = createTestMessages ( 2 ) ;
1770+
1771+ await scaleUpModule . scaleUp ( messages ) ;
1772+
1773+ // Verify listEC2Runners is called to check current runner count
1774+ expect ( listEC2Runners ) . toHaveBeenCalledWith ( {
1775+ environment : 'unit-test-environment' ,
1776+ runnerType : 'Org' ,
1777+ runnerOwner : TEST_DATA_SINGLE . repositoryOwner ,
1778+ } ) ;
1779+
1780+ // publishRetryMessage should NOT be called because messages are marked as invalid
1781+ // Invalid messages go back to the SQS queue and will be retried there
1782+ expect ( mockPublishRetryMessage ) . not . toHaveBeenCalled ( ) ;
1783+ expect ( createRunner ) . not . toHaveBeenCalled ( ) ;
1784+ } ) ;
1785+
1786+ it ( 'calls publishRetryMessage with correct message structure including retry counter' , async ( ) => {
1787+ const message = {
1788+ ...TEST_DATA_SINGLE ,
1789+ messageId : 'test-message-id' ,
1790+ retryCounter : 2 ,
1791+ } ;
1792+
1793+ await scaleUpModule . scaleUp ( [ message ] ) ;
1794+
1795+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1796+ expect . objectContaining ( {
1797+ id : message . id ,
1798+ messageId : 'test-message-id' ,
1799+ retryCounter : 2 ,
1800+ } ) ,
1801+ ) ;
1802+ } ) ;
1803+
1804+ it ( 'calls publishRetryMessage when ENABLE_JOB_QUEUED_CHECK is false' , async ( ) => {
1805+ process . env . ENABLE_JOB_QUEUED_CHECK = 'false' ;
1806+ mockCreateRunner . mockResolvedValue ( [ 'i-12345' , 'i-67890' ] ) ; // Create all requested runners
1807+
1808+ const messages = createTestMessages ( 2 ) ;
1809+
1810+ await scaleUpModule . scaleUp ( messages ) ;
1811+
1812+ // Should always call publishRetryMessage when queue check is disabled
1813+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 2 ) ;
1814+ expect ( mockOctokit . actions . getJobForWorkflowRun ) . not . toHaveBeenCalled ( ) ;
1815+ } ) ;
1816+
1817+ it ( 'calls publishRetryMessage for each message in a multi-runner scenario' , async ( ) => {
1818+ mockCreateRunner . mockResolvedValue ( [ 'i-12345' , 'i-67890' , 'i-abcdef' , 'i-11111' , 'i-22222' ] ) ; // Create all requested runners
1819+ const messages = createTestMessages ( 5 ) ;
1820+
1821+ await scaleUpModule . scaleUp ( messages ) ;
1822+
1823+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 5 ) ;
1824+ messages . forEach ( ( msg , index ) => {
1825+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1826+ index + 1 ,
1827+ expect . objectContaining ( {
1828+ id : msg . id ,
1829+ messageId : msg . messageId ,
1830+ } ) ,
1831+ ) ;
1832+ } ) ;
1833+ } ) ;
1834+
1835+ it ( 'calls publishRetryMessage after runner creation' , async ( ) => {
1836+ const messages = createTestMessages ( 1 ) ;
1837+ mockCreateRunner . mockResolvedValue ( [ 'i-12345' ] ) ; // Create the requested runner
1838+
1839+ const callOrder : string [ ] = [ ] ;
1840+ mockPublishRetryMessage . mockImplementation ( ( ) => {
1841+ callOrder . push ( 'publishRetryMessage' ) ;
1842+ return Promise . resolve ( ) ;
1843+ } ) ;
1844+ mockCreateRunner . mockImplementation ( async ( ) => {
1845+ callOrder . push ( 'createRunner' ) ;
1846+ return [ 'i-12345' ] ;
1847+ } ) ;
1848+
1849+ await scaleUpModule . scaleUp ( messages ) ;
1850+
1851+ expect ( callOrder ) . toEqual ( [ 'createRunner' , 'publishRetryMessage' ] ) ;
1852+ } ) ;
1853+ } ) ;
1854+
16831855function defaultOctokitMockImpl ( ) {
16841856 mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( ) => ( {
16851857 data : {
0 commit comments