From df08afe096e953bc0aa408e3ae3a364ca7be03fb Mon Sep 17 00:00:00 2001 From: Martin Kersner Date: Thu, 2 Jul 2026 16:48:46 +0900 Subject: [PATCH] fix: tolerate transient gateway 5xx lane-wide; de-mask live CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live keyed lane went red again on a transient prod 504 — this time test_cex_candle (ServerError 504 "upstream request timeout"). #133's per-endpoint live_call wrapping only covered TestPremium, so the same infra flakiness resurfaced on the next unwrapped call. The run still showed green: job-level continue-on-error masked the failed job. - conftest: replace per-call live_call helper with autouse fixture _tolerate_transient_gateway — monkeypatches API.send_request (the one method every endpoint inherits) to retry transient 502/503/504 then pytest.skip. Covers test_call.py + test_integration.py + future live tests. No-op without a key (keyless/mocked lanes untouched). - test_integration: revert the 19 live_call(lambda: ...) premium wraps to direct calls; drop now-dead import. - live-tests.yml: move continue-on-error from job to pytest step so the job conclusion stays honest (setup failures still red) while the push stays non-blocking; add a step that annotates + writes a run-summary warning when the live suite fails, so failures are visible not silent. keyless offline suite unchanged: 134 passed, 11 skipped. --- .github/workflows/live-tests.yml | 26 ++++++++++++--- tests/conftest.py | 56 +++++++++++++++++++++----------- tests/test_integration.py | 46 ++++++++++++-------------- 3 files changed, 78 insertions(+), 50 deletions(-) diff --git a/.github/workflows/live-tests.yml b/.github/workflows/live-tests.yml index 5ed5787..eae413d 100644 --- a/.github/workflows/live-tests.yml +++ b/.github/workflows/live-tests.yml @@ -20,11 +20,6 @@ permissions: jobs: live: runs-on: ubuntu-latest - # Non-blocking: transient prod-data flakiness (cold pods -> 500 "no data - # found", empty-page premium ValueError) must not fail a run. PRs never - # trigger this lane (main-only), so the required offline build stays the - # gating check. - continue-on-error: true steps: - uses: actions/checkout@v4 @@ -41,8 +36,29 @@ jobs: # Full keyed suite: integration + smoke lanes hit prod with the secret key. # DATAMAXI_TIMEOUT bumped past the conftest 30s default to tolerate slow # cold pods. + # + # Non-blocking by design: transient prod-data flakiness (cold pods -> 500 + # "no data found", empty-page premium ValueError; transient gateway 5xx are + # auto-skipped in conftest) must not fail the push. PRs never trigger this + # lane (main-only), so the required offline build stays the gating check. + # `continue-on-error` is scoped to THIS step (not the job) so setup failures + # still go red, and the job's own conclusion stays honest instead of a + # green run masking a failed job. `id` lets the next step read the outcome. - name: Run live keyed tests + id: pytest + continue-on-error: true env: DATAMAXI_API_KEY: ${{ secrets.DATAMAXI_API_KEY }} DATAMAXI_TIMEOUT: "60" run: python -m pytest tests/ -q + + # Surface the real result. `continue-on-error` above keeps the lane + # non-blocking (a red live suite never fails the push), but it also masks + # the step as green — so a genuine failure slips by silently. This step + # re-reads the pytest outcome and emits a warning annotation + a line in the + # run summary when it failed, giving a visible signal without gating. + - name: Flag live-suite failure (non-blocking) + if: steps.pytest.outcome == 'failure' + run: | + echo "::warning title=Live keyed tests failed::The non-blocking live lane failed against prod. Inspect the 'Run live keyed tests' step log (transient prod 5xx are auto-skipped, so this is likely a real regression)." + echo "⚠️ **Live keyed tests failed** (non-blocking lane). See the 'Run live keyed tests' step log — transient prod 5xx are auto-skipped, so a failure here is likely a real regression." >> "$GITHUB_STEP_SUMMARY" diff --git a/tests/conftest.py b/tests/conftest.py index 6196ccf..2293be5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,7 @@ import pytest from datamaxi import Datamaxi, Telegram, Naver +from datamaxi.api import API from datamaxi.error import ServerError # Transient gateway statuses: the prod edge returns these when an upstream is @@ -27,27 +28,44 @@ TIMEOUT = int(os.getenv("DATAMAXI_TIMEOUT") or "30") -def live_call(fn, retries=3, backoff=1.0): - """Invoke a live-endpoint call, tolerating transient gateway errors. +@pytest.fixture(autouse=True) +def _tolerate_transient_gateway(monkeypatch): + """Retry transient gateway 5xx at the HTTP boundary for every live call. - Retries ``fn`` on a transient 5xx (``_TRANSIENT_STATUS``) with linear - backoff. If every attempt still hits a transient status the call is - ``pytest.skip``-ped rather than failed — the non-blocking live lane must - not go red on prod infra flakiness. Any other error (real 5xx, 4xx, - assertion) propagates unchanged. + The keyed live lane hits prod, whose edge intermittently returns a + ``_TRANSIENT_STATUS`` (502/503/504) under load — infra flakiness, not an SDK + bug. Wrapping ``API.send_request`` (the single method every endpoint object + inherits) makes EVERY live call across ``test_call.py`` and + ``test_integration.py`` retry with linear backoff, then ``pytest.skip`` if + still transient — so the non-blocking lane never goes red on a transient + prod 5xx, without per-test wrapping. Real 5xx/4xx and assertions propagate + on the first attempt. + + No-op without a key: the keyless/mocked lanes are skipped (``API_KEY`` + unset) or mock the transport, so they never raise a transient ``ServerError`` + and stay untouched. """ - for attempt in range(retries): - try: - return fn() - except ServerError as e: - if e.status_code not in _TRANSIENT_STATUS: - raise - if attempt == retries - 1: - pytest.skip( - "transient %s from live endpoint after %d attempts" - % (e.status_code, retries) - ) - time.sleep(backoff * (attempt + 1)) + if not API_KEY: + return + + retries, backoff = 3, 1.0 + original = API.send_request + + def retrying(self, *args, **kwargs): + for attempt in range(retries): + try: + return original(self, *args, **kwargs) + except ServerError as e: + if e.status_code not in _TRANSIENT_STATUS: + raise + if attempt == retries - 1: + pytest.skip( + "transient %s from live endpoint after %d attempts" + % (e.status_code, retries) + ) + time.sleep(backoff * (attempt + 1)) + + monkeypatch.setattr(API, "send_request", retrying) @pytest.fixture(scope="module") diff --git a/tests/test_integration.py b/tests/test_integration.py index 36ef706..4628d49 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -21,7 +21,7 @@ from datetime import datetime, timedelta from datamaxi.error import ParameterRequiredError -from tests.conftest import API_KEY, live_call +from tests.conftest import API_KEY # Live integration lane: exercises prod endpoints with every supported param. # Skipped without a key and deselected from the keyless CI lane via the @@ -587,106 +587,100 @@ def test_exchanges(self, datamaxi): def test_premium_basic(self, datamaxi): """Test basic premium data fetch.""" - result = live_call(lambda: datamaxi.premium()) + result = datamaxi.premium() assert isinstance(result, pd.DataFrame) assert len(result) > 0 def test_premium_with_pagination(self, datamaxi): """Test premium data with pagination.""" - result = live_call(lambda: datamaxi.premium(page=1, limit=10)) + result = datamaxi.premium(page=1, limit=10) assert isinstance(result, pd.DataFrame) assert len(result) <= 10 def test_premium_source_exchange(self, datamaxi): """Test premium data filtered by source_exchange.""" - result = live_call( - lambda: datamaxi.premium(source_exchange="binance", limit=10) - ) + result = datamaxi.premium(source_exchange="binance", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_target_exchange(self, datamaxi): """Test premium data filtered by target_exchange.""" - result = live_call(lambda: datamaxi.premium(target_exchange="upbit", limit=10)) + result = datamaxi.premium(target_exchange="upbit", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_with_asset(self, datamaxi): """Test premium data filtered by asset.""" - result = live_call(lambda: datamaxi.premium(asset="BTC", limit=10)) + result = datamaxi.premium(asset="BTC", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_source_quote(self, datamaxi): """Test premium data filtered by source_quote.""" - result = live_call(lambda: datamaxi.premium(source_quote="USDT", limit=10)) + result = datamaxi.premium(source_quote="USDT", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_target_quote(self, datamaxi): """Test premium data filtered by target_quote.""" - result = live_call(lambda: datamaxi.premium(target_quote="KRW", limit=10)) + result = datamaxi.premium(target_quote="KRW", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_source_market(self, datamaxi): """Test premium data filtered by source_market.""" - result = live_call(lambda: datamaxi.premium(source_market="spot", limit=10)) + result = datamaxi.premium(source_market="spot", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_target_market(self, datamaxi): """Test premium data filtered by target_market.""" - result = live_call(lambda: datamaxi.premium(target_market="spot", limit=10)) + result = datamaxi.premium(target_market="spot", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_both_markets(self, datamaxi): """Test premium data filtered by both markets.""" - result = live_call( - lambda: datamaxi.premium( - source_market="spot", target_market="spot", limit=10 - ) - ) + result = datamaxi.premium(source_market="spot", target_market="spot", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_sort_asc(self, datamaxi): """Test premium data with sort=asc.""" - result = live_call(lambda: datamaxi.premium(sort="asc", key="pdp", limit=10)) + result = datamaxi.premium(sort="asc", key="pdp", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_sort_desc(self, datamaxi): """Test premium data with sort=desc.""" - result = live_call(lambda: datamaxi.premium(sort="desc", key="pdp", limit=10)) + result = datamaxi.premium(sort="desc", key="pdp", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_volume_filters(self, datamaxi): """Test premium data with volume filters.""" - result = live_call(lambda: datamaxi.premium(min_sv="100000", limit=10)) + result = datamaxi.premium(min_sv="100000", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_only_transferable(self, datamaxi): """Test premium data with only_transferable=True.""" - result = live_call(lambda: datamaxi.premium(only_transferable=True, limit=10)) + result = datamaxi.premium(only_transferable=True, limit=10) assert isinstance(result, pd.DataFrame) def test_premium_with_currency(self, datamaxi): """Test premium data with currency parameter.""" - result = live_call(lambda: datamaxi.premium(currency="KRW", limit=10)) + result = datamaxi.premium(currency="KRW", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_with_conversion_base(self, datamaxi): """Test premium data with conversion_base parameter (USD or USDT).""" - result = live_call(lambda: datamaxi.premium(conversion_base="USD", limit=10)) + result = datamaxi.premium(conversion_base="USD", limit=10) assert isinstance(result, pd.DataFrame) assert len(result) > 0 def test_premium_token_include(self, datamaxi): """Test premium data with token_include filter.""" - result = live_call(lambda: datamaxi.premium(token_include="bitcoin", limit=10)) + result = datamaxi.premium(token_include="bitcoin", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_token_exclude(self, datamaxi): """Test premium data with token_exclude filter.""" - result = live_call(lambda: datamaxi.premium(token_exclude="SHIB", limit=10)) + result = datamaxi.premium(token_exclude="SHIB", limit=10) assert isinstance(result, pd.DataFrame) def test_premium_pandas_false(self, datamaxi): """Test premium data with pandas=False.""" - result = live_call(lambda: datamaxi.premium(pandas=False, limit=10)) + result = datamaxi.premium(pandas=False, limit=10) assert isinstance(result, dict) assert "data" in result