@@ -10,6 +10,7 @@ import { createRunner, listEC2Runners } from './../aws/runners';
1010import { RunnerInputParameters } from './../aws/runners.d' ;
1111import * as scaleUpModule from './scale-up' ;
1212import { getParameter } from '@aws-github-runner/aws-ssm-util' ;
13+ import { publishRetryMessage } from './job-retry' ;
1314import { describe , it , expect , beforeEach , vi } from 'vitest' ;
1415import type { Octokit } from '@octokit/rest' ;
1516
@@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
3334const mockListRunners = vi . mocked ( listEC2Runners ) ;
3435const mockSSMClient = mockClient ( SSMClient ) ;
3536const mockSSMgetParameter = vi . mocked ( getParameter ) ;
37+ const mockPublishRetryMessage = vi . mocked ( publishRetryMessage ) ;
3638
3739vi . mock ( '@octokit/rest' , ( ) => ( {
3840 Octokit : vi . fn ( ) . mockImplementation ( function ( ) {
@@ -63,6 +65,11 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
6365 } ;
6466} ) ;
6567
68+ vi . mock ( './job-retry' , ( ) => ( {
69+ publishRetryMessage : vi . fn ( ) ,
70+ checkAndRetryJob : vi . fn ( ) ,
71+ } ) ) ;
72+
6673export type RunnerType = 'ephemeral' | 'non-ephemeral' ;
6774
6875// for ephemeral and non-ephemeral runners
@@ -1680,6 +1687,159 @@ describe('scaleUp with Github Data Residency', () => {
16801687 } ) ;
16811688} ) ;
16821689
1690+ describe ( 'Retry mechanism tests' , ( ) => {
1691+ beforeEach ( ( ) => {
1692+ process . env . ENABLE_ORGANIZATION_RUNNERS = 'true' ;
1693+ process . env . ENABLE_EPHEMERAL_RUNNERS = 'true' ;
1694+ process . env . ENABLE_JOB_QUEUED_CHECK = 'true' ;
1695+ process . env . RUNNERS_MAXIMUM_COUNT = '10' ;
1696+ expectedRunnerParams = { ...EXPECTED_RUNNER_PARAMS } ;
1697+ mockSSMClient . reset ( ) ;
1698+ } ) ;
1699+
1700+ const createTestMessages = (
1701+ count : number ,
1702+ overrides : Partial < scaleUpModule . ActionRequestMessageSQS > [ ] = [ ] ,
1703+ ) : scaleUpModule . ActionRequestMessageSQS [ ] => {
1704+ return Array . from ( { length : count } , ( _ , i ) => ( {
1705+ ...TEST_DATA_SINGLE ,
1706+ id : i + 1 ,
1707+ messageId : `message-${ i + 1 } ` ,
1708+ ...overrides [ i ] ,
1709+ } ) ) ;
1710+ } ;
1711+
1712+ it ( 'calls publishRetryMessage for each valid message when job is queued' , async ( ) => {
1713+ const messages = createTestMessages ( 3 ) ;
1714+
1715+ await scaleUpModule . scaleUp ( messages ) ;
1716+
1717+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 3 ) ;
1718+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1719+ 1 ,
1720+ expect . objectContaining ( {
1721+ id : 1 ,
1722+ messageId : 'message-1' ,
1723+ } ) ,
1724+ ) ;
1725+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1726+ 2 ,
1727+ expect . objectContaining ( {
1728+ id : 2 ,
1729+ messageId : 'message-2' ,
1730+ } ) ,
1731+ ) ;
1732+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1733+ 3 ,
1734+ expect . objectContaining ( {
1735+ id : 3 ,
1736+ messageId : 'message-3' ,
1737+ } ) ,
1738+ ) ;
1739+ } ) ;
1740+
1741+ it ( 'does not call publishRetryMessage when job is not queued' , async ( ) => {
1742+ mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( params ) => {
1743+ const isQueued = params . job_id === 1 ; // Only job 1 is queued
1744+ return {
1745+ data : {
1746+ status : isQueued ? 'queued' : 'completed' ,
1747+ } ,
1748+ } ;
1749+ } ) ;
1750+
1751+ const messages = createTestMessages ( 3 ) ;
1752+
1753+ await scaleUpModule . scaleUp ( messages ) ;
1754+
1755+ // Only message with id 1 should trigger retry
1756+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 1 ) ;
1757+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1758+ expect . objectContaining ( {
1759+ id : 1 ,
1760+ messageId : 'message-1' ,
1761+ } ) ,
1762+ ) ;
1763+ } ) ;
1764+
1765+ it ( 'calls publishRetryMessage even when maximum runners is reached' , async ( ) => {
1766+ process . env . RUNNERS_MAXIMUM_COUNT = '0' ; // No runners can be created
1767+
1768+ const messages = createTestMessages ( 2 ) ;
1769+
1770+ await scaleUpModule . scaleUp ( messages ) ;
1771+
1772+ // publishRetryMessage should still be called even though no runners will be created
1773+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 2 ) ;
1774+ expect ( createRunner ) . not . toHaveBeenCalled ( ) ;
1775+ } ) ;
1776+
1777+ it ( 'calls publishRetryMessage with correct message structure including retry counter' , async ( ) => {
1778+ const message = {
1779+ ...TEST_DATA_SINGLE ,
1780+ messageId : 'test-message-id' ,
1781+ retryCounter : 2 ,
1782+ } ;
1783+
1784+ await scaleUpModule . scaleUp ( [ message ] ) ;
1785+
1786+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1787+ expect . objectContaining ( {
1788+ id : message . id ,
1789+ messageId : 'test-message-id' ,
1790+ retryCounter : 2 ,
1791+ } ) ,
1792+ ) ;
1793+ } ) ;
1794+
1795+ it ( 'calls publishRetryMessage when ENABLE_JOB_QUEUED_CHECK is false' , async ( ) => {
1796+ process . env . ENABLE_JOB_QUEUED_CHECK = 'false' ;
1797+
1798+ const messages = createTestMessages ( 2 ) ;
1799+
1800+ await scaleUpModule . scaleUp ( messages ) ;
1801+
1802+ // Should always call publishRetryMessage when queue check is disabled
1803+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 2 ) ;
1804+ expect ( mockOctokit . actions . getJobForWorkflowRun ) . not . toHaveBeenCalled ( ) ;
1805+ } ) ;
1806+
1807+ it ( 'calls publishRetryMessage for each message in a multi-runner scenario' , async ( ) => {
1808+ const messages = createTestMessages ( 5 ) ;
1809+
1810+ await scaleUpModule . scaleUp ( messages ) ;
1811+
1812+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 5 ) ;
1813+ messages . forEach ( ( msg , index ) => {
1814+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1815+ index + 1 ,
1816+ expect . objectContaining ( {
1817+ id : msg . id ,
1818+ messageId : msg . messageId ,
1819+ } ) ,
1820+ ) ;
1821+ } ) ;
1822+ } ) ;
1823+
1824+ it ( 'calls publishRetryMessage before runner creation' , async ( ) => {
1825+ const messages = createTestMessages ( 1 ) ;
1826+
1827+ const callOrder : string [ ] = [ ] ;
1828+ mockPublishRetryMessage . mockImplementation ( ( ) => {
1829+ callOrder . push ( 'publishRetryMessage' ) ;
1830+ return Promise . resolve ( ) ;
1831+ } ) ;
1832+ mockCreateRunner . mockImplementation ( async ( ) => {
1833+ callOrder . push ( 'createRunner' ) ;
1834+ return [ 'i-12345' ] ;
1835+ } ) ;
1836+
1837+ await scaleUpModule . scaleUp ( messages ) ;
1838+
1839+ expect ( callOrder ) . toEqual ( [ 'publishRetryMessage' , 'createRunner' ] ) ;
1840+ } ) ;
1841+ } ) ;
1842+
16831843function defaultOctokitMockImpl ( ) {
16841844 mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( ) => ( {
16851845 data : {
0 commit comments